radeon_optimize.c revision 1c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6
11c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/*
21c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Copyright (C) 2009 Nicolai Haehnle.
31c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Copyright 2010 Tom Stellard <tstellar@gmail.com>
41c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *
51c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * All Rights Reserved.
61c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *
71c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Permission is hereby granted, free of charge, to any person obtaining
81c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * a copy of this software and associated documentation files (the
91c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * "Software"), to deal in the Software without restriction, including
101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * without limitation the rights to use, copy, modify, merge, publish,
111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * distribute, sublicense, and/or sell copies of the Software, and to
121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * permit persons to whom the Software is furnished to do so, subject to
131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * the following conditions:
141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *
151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * The above copyright notice and this permission notice (including the
161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * next paragraph) shall be included in all copies or substantial
171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * portions of the Software.
181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *
191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *
271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */
281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák#include "radeon_dataflow.h"
301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák#include "radeon_compiler.h"
321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák#include "radeon_compiler_util.h"
331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák#include "radeon_swizzle.h"
341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstruct src_clobbered_reads_cb_data {
361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_register_file File;
371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned int Index;
381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned int Mask;
391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_reader_data * ReaderData;
401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák};
411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšáktypedef void (*rc_presub_replace_fn)(struct rc_instruction *,
431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák						struct rc_instruction *,
441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák						unsigned int);
451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_src_register combine;
491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	combine.File = inner.File;
501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	combine.Index = inner.Index;
511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	combine.RelAddr = inner.RelAddr;
521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (outer.Abs) {
531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		combine.Abs = 1;
541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		combine.Negate = outer.Negate;
551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	} else {
561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		combine.Abs = inner.Abs;
571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);
581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		combine.Negate ^= outer.Negate;
591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	return combine;
621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák						struct rc_src_register * src)
661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_register_file file = src->File;
681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_reader_data * reader_data = data;
691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if(!rc_inst_can_use_presub(inst,
711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				reader_data->Writer->U.I.PreSub.Opcode,
721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				rc_swizzle_to_writemask(src->Swizzle),
731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				src,
741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				&reader_data->Writer->U.I.PreSub.SrcReg[0],
751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				&reader_data->Writer->U.I.PreSub.SrcReg[1])) {
761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		reader_data->Abort = 1;
771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		return;
781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* XXX This could probably be handled better. */
811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (file == RC_FILE_ADDRESS) {
821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		reader_data->Abort = 1;
831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		return;
841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* These instructions cannot read from the constants file.
871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 * see radeonTransformTEX()
881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 */
891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				(inst->U.I.Opcode == RC_OPCODE_TEX ||
921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				inst->U.I.Opcode == RC_OPCODE_TXB ||
931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				inst->U.I.Opcode == RC_OPCODE_TXP ||
941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				inst->U.I.Opcode == RC_OPCODE_TXD ||
951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				inst->U.I.Opcode == RC_OPCODE_TXL ||
961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				inst->U.I.Opcode == RC_OPCODE_KIL)){
971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		reader_data->Abort = 1;
981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		return;
991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
1001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
1011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void src_clobbered_reads_cb(
1031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	void * data,
1041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction * inst,
1051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_src_register * src)
1061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
1071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct src_clobbered_reads_cb_data * sc_data = data;
1081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (src->File == sc_data->File
1101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	    && src->Index == sc_data->Index
1111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	    && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
1121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
1141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
1151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
1171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
1181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
1191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
1201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void is_src_clobbered_scan_write(
1221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	void * data,
1231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction * inst,
1241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_register_file file,
1251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned int index,
1261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned int mask)
1271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
1281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct src_clobbered_reads_cb_data sc_data;
1291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_reader_data * reader_data = data;
1301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	sc_data.File = file;
1311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	sc_data.Index = index;
1321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	sc_data.Mask = mask;
1331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	sc_data.ReaderData = reader_data;
1341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_for_all_reads_src(reader_data->Writer,
1351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák					src_clobbered_reads_cb, &sc_data);
1361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
1371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
1391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
1401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_reader_data reader_data;
1411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned int i;
1421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
1441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	    inst_mov->U.I.WriteALUResult ||
1451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	    inst_mov->U.I.SaturateMode)
1461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		return;
1471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* Get a list of all the readers of this MOV instruction. */
1491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	reader_data.ExitOnAbort = 1;
1501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_get_readers(c, inst_mov, &reader_data,
1511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		       copy_propagate_scan_read, NULL,
1521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		       is_src_clobbered_scan_write);
1531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (reader_data.Abort || reader_data.ReaderCount == 0)
1551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		return;
1561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* Propagate the MOV instruction. */
1581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	for (i = 0; i < reader_data.ReaderCount; i++) {
1591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		struct rc_instruction * inst = reader_data.Readers[i].Inst;
1601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		*reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]);
1611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
1631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			inst->U.I.PreSub = inst_mov->U.I.PreSub;
1641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
1651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* Finally, remove the original MOV instruction */
1671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_remove_instruction(inst_mov);
1681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
1691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/**
1711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Check if a source register is actually always the same
1721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * swizzle constant.
1731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */
1741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic int is_src_uniform_constant(struct rc_src_register src,
1751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		rc_swizzle * pswz, unsigned int * pnegate)
1761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
1771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	int have_used = 0;
1781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (src.File != RC_FILE_NONE) {
1801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		*pswz = 0;
1811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		return 0;
1821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
1831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	for(unsigned int chan = 0; chan < 4; ++chan) {
1851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		unsigned int swz = GET_SWZ(src.Swizzle, chan);
1861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if (swz < 4) {
1871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			*pswz = 0;
1881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			return 0;
1891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		}
1901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if (swz == RC_SWIZZLE_UNUSED)
1911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			continue;
1921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if (!have_used) {
1941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			*pswz = swz;
1951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			*pnegate = GET_BIT(src.Negate, chan);
1961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			have_used = 1;
1971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		} else {
1981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
1991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				*pswz = 0;
2001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				return 0;
2011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			}
2021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		}
2031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
2041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
2051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	return 1;
2061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
2071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
2081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void constant_folding_mad(struct rc_instruction * inst)
2091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
2101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_swizzle swz = 0;
2111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned int negate= 0;
2121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
2131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
2141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if (swz == RC_SWIZZLE_ZERO) {
2151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			inst->U.I.Opcode = RC_OPCODE_MUL;
2161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			return;
2171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		}
2181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
2191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
2201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
2211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if (swz == RC_SWIZZLE_ONE) {
2221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			inst->U.I.Opcode = RC_OPCODE_ADD;
2231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			if (negate)
2241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
2251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
2261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			return;
2271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		} else if (swz == RC_SWIZZLE_ZERO) {
2281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			inst->U.I.Opcode = RC_OPCODE_MOV;
2291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
2301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			return;
2311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		}
2321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
2331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
2341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
2351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if (swz == RC_SWIZZLE_ONE) {
2361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			inst->U.I.Opcode = RC_OPCODE_ADD;
2371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			if (negate)
2381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
2391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
2401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			return;
2411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		} else if (swz == RC_SWIZZLE_ZERO) {
2421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			inst->U.I.Opcode = RC_OPCODE_MOV;
2431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
2441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			return;
2451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		}
2461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
2471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
2481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
2491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void constant_folding_mul(struct rc_instruction * inst)
2501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
2511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_swizzle swz = 0;
2521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned int negate = 0;
2531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
2541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
2551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if (swz == RC_SWIZZLE_ONE) {
2561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			inst->U.I.Opcode = RC_OPCODE_MOV;
2571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
2581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			if (negate)
2591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
2601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			return;
2611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		} else if (swz == RC_SWIZZLE_ZERO) {
2621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			inst->U.I.Opcode = RC_OPCODE_MOV;
2631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
2641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			return;
2651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		}
2661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
2671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
2681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
2691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if (swz == RC_SWIZZLE_ONE) {
2701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			inst->U.I.Opcode = RC_OPCODE_MOV;
2711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			if (negate)
2721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
2731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			return;
2741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		} else if (swz == RC_SWIZZLE_ZERO) {
2751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			inst->U.I.Opcode = RC_OPCODE_MOV;
2761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
2771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			return;
2781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		}
2791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
2801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
2811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
2821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void constant_folding_add(struct rc_instruction * inst)
2831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
2841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_swizzle swz = 0;
2851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned int negate = 0;
2861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
2871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
2881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if (swz == RC_SWIZZLE_ZERO) {
2891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			inst->U.I.Opcode = RC_OPCODE_MOV;
2901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
2911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			return;
2921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		}
2931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
2941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
2951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
2961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if (swz == RC_SWIZZLE_ZERO) {
2971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			inst->U.I.Opcode = RC_OPCODE_MOV;
2981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			return;
2991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		}
3001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
3011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
3021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/**
3041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Replace 0.0, 1.0 and 0.5 immediate constants by their
3051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * respective swizzles. Simplify instructions like ADD dst, src, 0;
3061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */
3071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
3081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
3091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
3101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned int i;
3111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
3131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
3141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		struct rc_constant * constant;
3151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		struct rc_src_register newsrc;
3161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		int have_real_reference;
3171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		unsigned int chan;
3181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		/* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
3201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		for (chan = 0; chan < 4; ++chan)
3211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3)
3221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				break;
3231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if (chan == 4) {
3241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			inst->U.I.SrcReg[src].File = RC_FILE_NONE;
3251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			continue;
3261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		}
3271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		/* Convert immediates to swizzles. */
3291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
3301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		    inst->U.I.SrcReg[src].RelAddr ||
3311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		    inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
3321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			continue;
3331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		constant =
3351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			&c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
3361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if (constant->Type != RC_CONSTANT_IMMEDIATE)
3381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			continue;
3391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		newsrc = inst->U.I.SrcReg[src];
3411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		have_real_reference = 0;
3421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		for (chan = 0; chan < 4; ++chan) {
3431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
3441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			unsigned int newswz;
3451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			float imm;
3461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			float baseimm;
3471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			if (swz >= 4)
3491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				continue;
3501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			imm = constant->u.Immediate[swz];
3521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			baseimm = imm;
3531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			if (imm < 0.0)
3541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				baseimm = -baseimm;
3551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			if (baseimm == 0.0) {
3571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				newswz = RC_SWIZZLE_ZERO;
3581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			} else if (baseimm == 1.0) {
3591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				newswz = RC_SWIZZLE_ONE;
3601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			} else if (baseimm == 0.5 && c->has_half_swizzles) {
3611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				newswz = RC_SWIZZLE_HALF;
3621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			} else {
3631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				have_real_reference = 1;
3641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				continue;
3651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			}
3661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			SET_SWZ(newsrc.Swizzle, chan, newswz);
3681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			if (imm < 0.0 && !newsrc.Abs)
3691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				newsrc.Negate ^= 1 << chan;
3701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		}
3711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if (!have_real_reference) {
3731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			newsrc.File = RC_FILE_NONE;
3741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			newsrc.Index = 0;
3751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		}
3761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		/* don't make the swizzle worse */
3781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
3791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		    c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
3801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			continue;
3811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		inst->U.I.SrcReg[src] = newsrc;
3831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
3841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* Simplify instructions based on constants */
3861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (inst->U.I.Opcode == RC_OPCODE_MAD)
3871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		constant_folding_mad(inst);
3881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* note: MAD can simplify to MUL or ADD */
3901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (inst->U.I.Opcode == RC_OPCODE_MUL)
3911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		constant_folding_mul(inst);
3921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	else if (inst->U.I.Opcode == RC_OPCODE_ADD)
3931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		constant_folding_add(inst);
3941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* In case this instruction has been converted, make sure all of the
3961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 * registers that are no longer used are empty. */
3971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	opcode = rc_get_opcode_info(inst->U.I.Opcode);
3981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	for(i = opcode->NumSrcRegs; i < 3; i++) {
3991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
4001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
4011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
4021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
4031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/**
4041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * If src and dst use the same register, this function returns a writemask that
4051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * indicates wich components are read by src.  Otherwise zero is returned.
4061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */
4071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic unsigned int src_reads_dst_mask(struct rc_src_register src,
4081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák						struct rc_dst_register dst)
4091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
4101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (dst.File != src.File || dst.Index != src.Index) {
4111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		return 0;
4121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
4131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	return rc_swizzle_to_writemask(src.Swizzle);
4141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
4151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
4161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
4171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * in any of its channels.  Return 0 otherwise. */
4181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic int src_has_const_swz(struct rc_src_register src) {
4191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	int chan;
4201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	for(chan = 0; chan < 4; chan++) {
4211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		unsigned int swz = GET_SWZ(src.Swizzle, chan);
4221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
4231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák						|| swz == RC_SWIZZLE_ONE) {
4241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			return 1;
4251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		}
4261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
4271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	return 0;
4281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
4291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
4301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void presub_scan_read(
4311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	void * data,
4321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction * inst,
4331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_src_register * src)
4341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
4351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_reader_data * reader_data = data;
4361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_presubtract_op * presub_opcode = reader_data->CbData;
4371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
4381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (!rc_inst_can_use_presub(inst, *presub_opcode,
4391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			reader_data->Writer->U.I.DstReg.WriteMask,
4401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			src,
4411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			&reader_data->Writer->U.I.SrcReg[0],
4421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			&reader_data->Writer->U.I.SrcReg[1])) {
4431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		reader_data->Abort = 1;
4441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		return;
4451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
4461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
4471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
4481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic int presub_helper(
4491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct radeon_compiler * c,
4501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction * inst_add,
4511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_presubtract_op presub_opcode,
4521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_presub_replace_fn presub_replace)
4531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
4541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_reader_data reader_data;
4551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned int i;
4561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_presubtract_op cb_op = presub_opcode;
4571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
4581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	reader_data.CbData = &cb_op;
4591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	reader_data.ExitOnAbort = 1;
4601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
4611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák						is_src_clobbered_scan_write);
4621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
4631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (reader_data.Abort || reader_data.ReaderCount == 0)
4641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		return 0;
4651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
4661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	for(i = 0; i < reader_data.ReaderCount; i++) {
4671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		unsigned int src_index;
4681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		struct rc_reader reader = reader_data.Readers[i];
4691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		const struct rc_opcode_info * info =
4701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				rc_get_opcode_info(reader.Inst->U.I.Opcode);
4711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
4721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
4731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src)
4741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				presub_replace(inst_add, reader.Inst, src_index);
4751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		}
4761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
4771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	return 1;
4781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
4791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
4801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/* This function assumes that inst_add->U.I.SrcReg[0] and
4811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * inst_add->U.I.SrcReg[1] aren't both negative. */
4821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void presub_replace_add(
4831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction * inst_add,
4841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction * inst_reader,
4851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned int src_index)
4861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
4871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_presubtract_op presub_opcode;
4881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
4891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		presub_opcode = RC_PRESUB_SUB;
4901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	else
4911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		presub_opcode = RC_PRESUB_ADD;
4921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
4931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (inst_add->U.I.SrcReg[1].Negate) {
4941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
4951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
4961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	} else {
4971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
4981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
4991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
5001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
5011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
5021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	inst_reader->U.I.PreSub.Opcode = presub_opcode;
5031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	inst_reader->U.I.SrcReg[src_index] =
5041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			chain_srcregs(inst_reader->U.I.SrcReg[src_index],
5051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák					inst_reader->U.I.PreSub.SrcReg[0]);
5061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
5071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
5081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
5091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic int is_presub_candidate(
5111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct radeon_compiler * c,
5121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction * inst)
5131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
5141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
5151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned int i;
5161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned int is_constant[2] = {0, 0};
5171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	assert(inst->U.I.Opcode == RC_OPCODE_ADD);
5191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
5211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			|| inst->U.I.SaturateMode
5221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			|| inst->U.I.WriteALUResult) {
5231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		return 0;
5241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
5251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* If both sources use a constant swizzle, then we can't convert it to
5271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 * a presubtract operation.  In fact for the ADD and SUB presubtract
5281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 * operations neither source can contain a constant swizzle.  This
5291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 * specific case is checked in peephole_add_presub_add() when
5301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 * we make sure the swizzles for both sources are equal, so we
5311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 * don't need to worry about it here. */
5321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	for (i = 0; i < 2; i++) {
5331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		int chan;
5341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		for (chan = 0; chan < 4; chan++) {
5351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			rc_swizzle swz =
5361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				get_swz(inst->U.I.SrcReg[i].Swizzle, chan);
5371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			if (swz == RC_SWIZZLE_ONE
5381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák					|| swz == RC_SWIZZLE_ZERO
5391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák					|| swz == RC_SWIZZLE_HALF) {
5401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				is_constant[i] = 1;
5411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			}
5421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		}
5431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
5441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (is_constant[0] && is_constant[1])
5451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		return 0;
5461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	for(i = 0; i < info->NumSrcRegs; i++) {
5481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		struct rc_src_register src = inst->U.I.SrcReg[i];
5491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if (src_reads_dst_mask(src, inst->U.I.DstReg))
5501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			return 0;
5511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		src.File = RC_FILE_PRESUB;
5531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
5541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			return 0;
5551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
5561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	return 1;
5571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
5581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic int peephole_add_presub_add(
5601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct radeon_compiler * c,
5611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction * inst_add)
5621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
5631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned dstmask = inst_add->U.I.DstReg.WriteMask;
5641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák        unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask;
5651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák        unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask;
5661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
5681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		return 0;
5691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* src0 and src1 can't have absolute values */
5711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs)
5721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	        return 0;
5731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* presub_replace_add() assumes only one is negative */
5751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate)
5761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	        return 0;
5771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák        /* if src0 is negative, at least all bits of dstmask have to be set */
5791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák        if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask)
5801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	        return 0;
5811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák        /* if src1 is negative, at least all bits of dstmask have to be set */
5831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák        if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask)
5841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	        return 0;
5851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (!is_presub_candidate(c, inst_add))
5871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		return 0;
5881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
5901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		rc_remove_instruction(inst_add);
5911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		return 1;
5921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
5931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	return 0;
5941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
5951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void presub_replace_inv(
5971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction * inst_add,
5981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction * inst_reader,
5991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned int src_index)
6001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
6011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* We must be careful not to modify inst_add, since it
6021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 * is possible it will remain part of the program.*/
6031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
6041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
6051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
6061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
6071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák						inst_reader->U.I.PreSub.SrcReg[0]);
6081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
6091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
6101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
6111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
6121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
6131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/**
6141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
6151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Use the presubtract 1 - src0 for all readers of TEMP[0].  The first source
6161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * of the add instruction must have the constatnt 1 swizzle.  This function
6171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * does not check const registers to see if their value is 1.0, so it should
6181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * be called after the constant_folding optimization.
6191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * @return
6201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 	0 if the ADD instruction is still part of the program.
6211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 	1 if the ADD instruction is no longer part of the program.
6221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */
6231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic int peephole_add_presub_inv(
6241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct radeon_compiler * c,
6251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction * inst_add)
6261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
6271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned int i, swz;
6281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
6291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (!is_presub_candidate(c, inst_add))
6301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		return 0;
6311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
6321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* Check if src0 is 1. */
6331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* XXX It would be nice to use is_src_uniform_constant here, but that
6341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 * function only works if the register's file is RC_FILE_NONE */
6351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	for(i = 0; i < 4; i++ ) {
6361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
6371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
6381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák						&& swz != RC_SWIZZLE_ONE) {
6391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			return 0;
6401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		}
6411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
6421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
6431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* Check src1. */
6441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
6451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák						inst_add->U.I.DstReg.WriteMask
6461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		|| inst_add->U.I.SrcReg[1].Abs
6471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		|| (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
6481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			&& inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
6491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		|| src_has_const_swz(inst_add->U.I.SrcReg[1])) {
6501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
6511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		return 0;
6521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
6531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
6541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
6551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		rc_remove_instruction(inst_add);
6561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		return 1;
6571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
6581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	return 0;
6591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
6601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
6611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/**
6621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * @return
6631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 	0 if inst is still part of the program.
6641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 	1 if inst is no longer part of the program.
6651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */
6661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
6671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
6681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	switch(inst->U.I.Opcode){
6691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_ADD:
6701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if (c->has_presub) {
6711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			if(peephole_add_presub_inv(c, inst))
6721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				return 1;
6731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			if(peephole_add_presub_add(c, inst))
6741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				return 1;
6751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		}
6761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		break;
6771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	default:
6781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		break;
6791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
6801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	return 0;
6811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
6821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
6831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákvoid rc_optimize(struct radeon_compiler * c, void *user)
6841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
6851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction * inst = c->Program.Instructions.Next;
6861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	while(inst != &c->Program.Instructions) {
6871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		struct rc_instruction * cur = inst;
6881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		inst = inst->Next;
6891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
6901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		constant_folding(c, cur);
6911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
6921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if(peephole(c, cur))
6931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			continue;
6941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
6951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if (cur->U.I.Opcode == RC_OPCODE_MOV) {
6961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			copy_propagate(c, cur);
6971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			/* cur may no longer be part of the program */
6981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		}
6991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
7001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
701