1f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* 2f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Copyright (C) 2009 Nicolai Haehnle. 3f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Copyright 2010 Tom Stellard <tstellar@gmail.com> 4f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 5f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * All Rights Reserved. 6f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 7f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Permission is hereby granted, free of charge, to any person obtaining 8f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * a copy of this software and associated documentation files (the 9f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * "Software"), to deal in the Software without restriction, including 10f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * without limitation the rights to use, copy, modify, merge, publish, 11f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * distribute, sublicense, and/or sell copies of the Software, and to 12f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * permit persons to whom the Software is furnished to do so, subject to 13f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the following conditions: 14f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 15f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The above copyright notice and this permission notice (including the 16f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * next paragraph) shall be included in all copies or substantial 17f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * portions of the Software. 18f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 19f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 23f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 24f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 25f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 27f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 28f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 29f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_dataflow.h" 30f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 31f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_compiler.h" 32f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_compiler_util.h" 33f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_list.h" 34f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_swizzle.h" 35f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_variable.h" 36f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 37f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct src_clobbered_reads_cb_data { 38f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_register_file File; 39f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int Index; 40f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int Mask; 41f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_reader_data * ReaderData; 42f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; 43f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 44f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgtypedef void (*rc_presub_replace_fn)(struct rc_instruction *, 45f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction *, 46f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int); 47f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 48f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner) 49f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 50f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_src_register combine; 51f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org combine.File = inner.File; 52f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org combine.Index = inner.Index; 53f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org combine.RelAddr = inner.RelAddr; 54f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (outer.Abs) { 55f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org combine.Abs = 1; 56f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org combine.Negate = outer.Negate; 57f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 58f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org combine.Abs = inner.Abs; 59f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate); 60f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org combine.Negate ^= outer.Negate; 61f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 62f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle); 63f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return combine; 64f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 65f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 66f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void copy_propagate_scan_read(void * data, struct rc_instruction * inst, 67f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_src_register * src) 68f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 69f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_register_file file = src->File; 70f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_reader_data * reader_data = data; 71f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 72f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(!rc_inst_can_use_presub(inst, 73f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reader_data->Writer->U.I.PreSub.Opcode, 74f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_swizzle_to_writemask(src->Swizzle), 75f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org src, 76f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org &reader_data->Writer->U.I.PreSub.SrcReg[0], 77f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org &reader_data->Writer->U.I.PreSub.SrcReg[1])) { 78f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reader_data->Abort = 1; 79f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 80f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 81f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 82f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* XXX This could probably be handled better. */ 83f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (file == RC_FILE_ADDRESS) { 84f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reader_data->Abort = 1; 85f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 86f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 87f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 88f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* These instructions cannot read from the constants file. 89f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * see radeonTransformTEX() 90f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 91f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && 92f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT && 93f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (inst->U.I.Opcode == RC_OPCODE_TEX || 94f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.Opcode == RC_OPCODE_TXB || 95f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.Opcode == RC_OPCODE_TXP || 96f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.Opcode == RC_OPCODE_TXD || 97f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.Opcode == RC_OPCODE_TXL || 98f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.Opcode == RC_OPCODE_KIL)){ 99f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reader_data->Abort = 1; 100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void src_clobbered_reads_cb( 105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void * data, 106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * inst, 107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_src_register * src) 108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct src_clobbered_reads_cb_data * sc_data = data; 110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (src->File == sc_data->File 112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && src->Index == sc_data->Index 113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) { 114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; 116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) { 119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; 120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void is_src_clobbered_scan_write( 124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void * data, 125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * inst, 126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_register_file file, 127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int index, 128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int mask) 129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct src_clobbered_reads_cb_data sc_data; 131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_reader_data * reader_data = data; 132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sc_data.File = file; 133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sc_data.Index = index; 134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sc_data.Mask = mask; 135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sc_data.ReaderData = reader_data; 136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_for_all_reads_src(reader_data->Writer, 137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org src_clobbered_reads_cb, &sc_data); 138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov) 141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_reader_data reader_data; 143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int i; 144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || 146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_mov->U.I.WriteALUResult || 147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_mov->U.I.SaturateMode) 148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Get a list of all the readers of this MOV instruction. */ 151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reader_data.ExitOnAbort = 1; 152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_get_readers(c, inst_mov, &reader_data, 153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org copy_propagate_scan_read, NULL, 154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org is_src_clobbered_scan_write); 155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (reader_data.Abort || reader_data.ReaderCount == 0) 157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Propagate the MOV instruction. */ 160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (i = 0; i < reader_data.ReaderCount; i++) { 161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * inst = reader_data.Readers[i].Inst; 162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]); 163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) 165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.PreSub = inst_mov->U.I.PreSub; 166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Finally, remove the original MOV instruction */ 169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_remove_instruction(inst_mov); 170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Check if a source register is actually always the same 174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * swizzle constant. 175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int is_src_uniform_constant(struct rc_src_register src, 177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_swizzle * pswz, unsigned int * pnegate) 178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int have_used = 0; 180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (src.File != RC_FILE_NONE) { 182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *pswz = 0; 183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for(unsigned int chan = 0; chan < 4; ++chan) { 187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int swz = GET_SWZ(src.Swizzle, chan); 188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (swz < 4) { 189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *pswz = 0; 190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (swz == RC_SWIZZLE_UNUSED) 193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!have_used) { 196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *pswz = swz; 197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *pnegate = GET_BIT(src.Negate, chan); 198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org have_used = 1; 199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) { 201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *pswz = 0; 202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 1; 208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void constant_folding_mad(struct rc_instruction * inst) 211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_swizzle swz = 0; 213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int negate= 0; 214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) { 216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (swz == RC_SWIZZLE_ZERO) { 217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.Opcode = RC_OPCODE_MUL; 218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { 223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (swz == RC_SWIZZLE_ONE) { 224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.Opcode = RC_OPCODE_ADD; 225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (negate) 226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; 227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2]; 228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (swz == RC_SWIZZLE_ZERO) { 230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.Opcode = RC_OPCODE_MOV; 231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; 232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { 237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (swz == RC_SWIZZLE_ONE) { 238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.Opcode = RC_OPCODE_ADD; 239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (negate) 240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; 241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; 242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (swz == RC_SWIZZLE_ZERO) { 244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.Opcode = RC_OPCODE_MOV; 245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; 246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void constant_folding_mul(struct rc_instruction * inst) 252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_swizzle swz = 0; 254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int negate = 0; 255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { 257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (swz == RC_SWIZZLE_ONE) { 258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.Opcode = RC_OPCODE_MOV; 259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; 260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (negate) 261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; 262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (swz == RC_SWIZZLE_ZERO) { 264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.Opcode = RC_OPCODE_MOV; 265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; 266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { 271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (swz == RC_SWIZZLE_ONE) { 272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.Opcode = RC_OPCODE_MOV; 273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (negate) 274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; 275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 276f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (swz == RC_SWIZZLE_ZERO) { 277f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.Opcode = RC_OPCODE_MOV; 278f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; 279f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 280f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 281f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 282f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 283f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 284f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void constant_folding_add(struct rc_instruction * inst) 285f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 286f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_swizzle swz = 0; 287f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int negate = 0; 288f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 289f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { 290f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (swz == RC_SWIZZLE_ZERO) { 291f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.Opcode = RC_OPCODE_MOV; 292f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; 293f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 294f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 295f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 296f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 297f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { 298f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (swz == RC_SWIZZLE_ZERO) { 299f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.Opcode = RC_OPCODE_MOV; 300f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 301f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 302f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 303f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 304f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 305f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 306f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Replace 0.0, 1.0 and 0.5 immediate constants by their 307f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * respective swizzles. Simplify instructions like ADD dst, src, 0; 308f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 309f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst) 310f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 311f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 312f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int i; 313f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 314f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */ 315f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { 316f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_constant * constant; 317f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_src_register newsrc; 318f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int have_real_reference; 319f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int chan; 320f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 321f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */ 322f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (chan = 0; chan < 4; ++chan) 323f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3) 324f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 325f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (chan == 4) { 326f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.SrcReg[src].File = RC_FILE_NONE; 327f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 328f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 329f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 330f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Convert immediates to swizzles. */ 331f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT || 332f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.SrcReg[src].RelAddr || 333f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count) 334f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 335f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 336f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org constant = 337f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index]; 338f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 339f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (constant->Type != RC_CONSTANT_IMMEDIATE) 340f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 341f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 342f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org newsrc = inst->U.I.SrcReg[src]; 343f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org have_real_reference = 0; 344f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (chan = 0; chan < 4; ++chan) { 345f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int swz = GET_SWZ(newsrc.Swizzle, chan); 346f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int newswz; 347f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org float imm; 348f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org float baseimm; 349f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 350f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (swz >= 4) 351f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 352f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 353f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org imm = constant->u.Immediate[swz]; 354f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org baseimm = imm; 355f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (imm < 0.0) 356f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org baseimm = -baseimm; 357f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 358f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (baseimm == 0.0) { 359f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org newswz = RC_SWIZZLE_ZERO; 360f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (baseimm == 1.0) { 361f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org newswz = RC_SWIZZLE_ONE; 362f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (baseimm == 0.5 && c->has_half_swizzles) { 363f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org newswz = RC_SWIZZLE_HALF; 364f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 365f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org have_real_reference = 1; 366f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 367f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 368f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 369f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SET_SWZ(newsrc.Swizzle, chan, newswz); 370f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (imm < 0.0 && !newsrc.Abs) 371f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org newsrc.Negate ^= 1 << chan; 372f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 373f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 374f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!have_real_reference) { 375f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org newsrc.File = RC_FILE_NONE; 376f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org newsrc.Index = 0; 377f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 378f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 379f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* don't make the swizzle worse */ 380f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) && 381f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src])) 382f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 383f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 384f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.SrcReg[src] = newsrc; 385f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 386f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 387f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Simplify instructions based on constants */ 388f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->U.I.Opcode == RC_OPCODE_MAD) 389f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org constant_folding_mad(inst); 390f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 391f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* note: MAD can simplify to MUL or ADD */ 392f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->U.I.Opcode == RC_OPCODE_MUL) 393f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org constant_folding_mul(inst); 394f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else if (inst->U.I.Opcode == RC_OPCODE_ADD) 395f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org constant_folding_add(inst); 396f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 397f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* In case this instruction has been converted, make sure all of the 398f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * registers that are no longer used are empty. */ 399f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org opcode = rc_get_opcode_info(inst->U.I.Opcode); 400f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for(i = opcode->NumSrcRegs; i < 3; i++) { 401f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register)); 402f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 403f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 404f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 405f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 406f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * If src and dst use the same register, this function returns a writemask that 407f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * indicates wich components are read by src. Otherwise zero is returned. 408f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 409f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic unsigned int src_reads_dst_mask(struct rc_src_register src, 410f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_dst_register dst) 411f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 412f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (dst.File != src.File || dst.Index != src.Index) { 413f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 414f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 415f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return rc_swizzle_to_writemask(src.Swizzle); 416f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 417f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 418f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0) 419f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * in any of its channels. Return 0 otherwise. */ 420f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int src_has_const_swz(struct rc_src_register src) { 421f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int chan; 422f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for(chan = 0; chan < 4; chan++) { 423f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int swz = GET_SWZ(src.Swizzle, chan); 424f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF 425f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org || swz == RC_SWIZZLE_ONE) { 426f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 1; 427f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 428f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 429f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 430f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 431f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 432f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void presub_scan_read( 433f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void * data, 434f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * inst, 435f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_src_register * src) 436f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 437f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_reader_data * reader_data = data; 438f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_presubtract_op * presub_opcode = reader_data->CbData; 439f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 440f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!rc_inst_can_use_presub(inst, *presub_opcode, 441f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reader_data->Writer->U.I.DstReg.WriteMask, 442f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org src, 443f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org &reader_data->Writer->U.I.SrcReg[0], 444f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org &reader_data->Writer->U.I.SrcReg[1])) { 445f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reader_data->Abort = 1; 446f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 447f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 448f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 449f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 450f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int presub_helper( 451f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct radeon_compiler * c, 452f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * inst_add, 453f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_presubtract_op presub_opcode, 454f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_presub_replace_fn presub_replace) 455f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 456f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_reader_data reader_data; 457f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int i; 458f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_presubtract_op cb_op = presub_opcode; 459f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 460f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reader_data.CbData = &cb_op; 461f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reader_data.ExitOnAbort = 1; 462f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL, 463f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org is_src_clobbered_scan_write); 464f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 465f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (reader_data.Abort || reader_data.ReaderCount == 0) 466f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 467f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 468f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for(i = 0; i < reader_data.ReaderCount; i++) { 469f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int src_index; 470f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_reader reader = reader_data.Readers[i]; 471f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const struct rc_opcode_info * info = 472f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_get_opcode_info(reader.Inst->U.I.Opcode); 473f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 474f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (src_index = 0; src_index < info->NumSrcRegs; src_index++) { 475f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src) 476f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org presub_replace(inst_add, reader.Inst, src_index); 477f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 478f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 479f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 1; 480f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 481f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 482f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* This function assumes that inst_add->U.I.SrcReg[0] and 483f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * inst_add->U.I.SrcReg[1] aren't both negative. */ 484f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void presub_replace_add( 485f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * inst_add, 486f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * inst_reader, 487f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int src_index) 488f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 489f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_presubtract_op presub_opcode; 490f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate) 491f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org presub_opcode = RC_PRESUB_SUB; 492f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 493f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org presub_opcode = RC_PRESUB_ADD; 494f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 495f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst_add->U.I.SrcReg[1].Negate) { 496f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; 497f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0]; 498f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 499f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0]; 500f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1]; 501f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 502f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; 503f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_reader->U.I.PreSub.SrcReg[1].Negate = 0; 504f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_reader->U.I.PreSub.Opcode = presub_opcode; 505f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_reader->U.I.SrcReg[src_index] = 506f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org chain_srcregs(inst_reader->U.I.SrcReg[src_index], 507f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_reader->U.I.PreSub.SrcReg[0]); 508f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; 509f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_reader->U.I.SrcReg[src_index].Index = presub_opcode; 510f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 511f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 512f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int is_presub_candidate( 513f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct radeon_compiler * c, 514f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * inst) 515f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 516f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); 517f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int i; 518f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int is_constant[2] = {0, 0}; 519f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 520f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(inst->U.I.Opcode == RC_OPCODE_ADD); 521f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 522f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE 523f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org || inst->U.I.SaturateMode 524f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org || inst->U.I.WriteALUResult 525f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org || inst->U.I.Omod) { 526f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 527f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 528f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 529f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* If both sources use a constant swizzle, then we can't convert it to 530f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * a presubtract operation. In fact for the ADD and SUB presubtract 531f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * operations neither source can contain a constant swizzle. This 532f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * specific case is checked in peephole_add_presub_add() when 533f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * we make sure the swizzles for both sources are equal, so we 534f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * don't need to worry about it here. */ 535f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (i = 0; i < 2; i++) { 536f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int chan; 537f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (chan = 0; chan < 4; chan++) { 538f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_swizzle swz = 539f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org get_swz(inst->U.I.SrcReg[i].Swizzle, chan); 540f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (swz == RC_SWIZZLE_ONE 541f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org || swz == RC_SWIZZLE_ZERO 542f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org || swz == RC_SWIZZLE_HALF) { 543f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org is_constant[i] = 1; 544f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 545f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 546f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 547f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (is_constant[0] && is_constant[1]) 548f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 549f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 550f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for(i = 0; i < info->NumSrcRegs; i++) { 551f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_src_register src = inst->U.I.SrcReg[i]; 552f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (src_reads_dst_mask(src, inst->U.I.DstReg)) 553f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 554f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 555f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org src.File = RC_FILE_PRESUB; 556f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src)) 557f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 558f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 559f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 1; 560f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 561f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 562f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int peephole_add_presub_add( 563f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct radeon_compiler * c, 564f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * inst_add) 565f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 566f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned dstmask = inst_add->U.I.DstReg.WriteMask; 567f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask; 568f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask; 569f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 570f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) 571f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 572f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 573f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* src0 and src1 can't have absolute values */ 574f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs) 575f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 576f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 577f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* presub_replace_add() assumes only one is negative */ 578f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate) 579f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 580f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 581f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* if src0 is negative, at least all bits of dstmask have to be set */ 582f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask) 583f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 584f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 585f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* if src1 is negative, at least all bits of dstmask have to be set */ 586f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask) 587f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 588f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 589f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!is_presub_candidate(c, inst_add)) 590f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 591f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 592f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) { 593f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_remove_instruction(inst_add); 594f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 1; 595f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 596f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 597f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 598f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 599f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void presub_replace_inv( 600f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * inst_add, 601f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * inst_reader, 602f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int src_index) 603f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 604f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* We must be careful not to modify inst_add, since it 605f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * is possible it will remain part of the program.*/ 606f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; 607f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; 608f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV; 609f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index], 610f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_reader->U.I.PreSub.SrcReg[0]); 611f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 612f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; 613f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV; 614f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 615f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 616f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 617f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1] 618f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source 619f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * of the add instruction must have the constatnt 1 swizzle. This function 620f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * does not check const registers to see if their value is 1.0, so it should 621f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * be called after the constant_folding optimization. 622f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * @return 623f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 0 if the ADD instruction is still part of the program. 624f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 1 if the ADD instruction is no longer part of the program. 625f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 626f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int peephole_add_presub_inv( 627f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct radeon_compiler * c, 628f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * inst_add) 629f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 630f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int i, swz; 631f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 632f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!is_presub_candidate(c, inst_add)) 633f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 634f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 635f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Check if src0 is 1. */ 636f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* XXX It would be nice to use is_src_uniform_constant here, but that 637f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * function only works if the register's file is RC_FILE_NONE */ 638f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for(i = 0; i < 4; i++ ) { 639f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i); 640f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(((1 << i) & inst_add->U.I.DstReg.WriteMask) 641f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && swz != RC_SWIZZLE_ONE) { 642f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 643f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 644f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 645f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 646f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Check src1. */ 647f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) != 648f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_add->U.I.DstReg.WriteMask 649f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org || inst_add->U.I.SrcReg[1].Abs 650f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY 651f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT) 652f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org || src_has_const_swz(inst_add->U.I.SrcReg[1])) { 653f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 654f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 655f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 656f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 657f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) { 658f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_remove_instruction(inst_add); 659f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 1; 660f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 661f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 662f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 663f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 664f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct peephole_mul_cb_data { 665f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_dst_register * Writer; 666f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int Clobbered; 667f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; 668f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 669f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void omod_filter_reader_cb( 670f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void * userdata, 671f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * inst, 672f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_register_file file, 673f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int index, 674f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int mask) 675f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 676f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct peephole_mul_cb_data * d = userdata; 677f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (rc_src_reads_dst_mask(file, mask, index, 678f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) { 679f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 680f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org d->Clobbered = 1; 681f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 682f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 683f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 684f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void omod_filter_writer_cb( 685f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void * userdata, 686f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * inst, 687f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_register_file file, 688f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int index, 689f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int mask) 690f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 691f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct peephole_mul_cb_data * d = userdata; 692f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (file == d->Writer->File && index == d->Writer->Index && 693f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (mask & d->Writer->WriteMask)) { 694f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org d->Clobbered = 1; 695f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 696f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 697f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 698f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int peephole_mul_omod( 699f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct radeon_compiler * c, 700f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * inst_mul, 701f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_list * var_list) 702f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 703f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int chan = 0, swz, i; 704f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int const_index = -1; 705f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int temp_index = -1; 706f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org float const_value; 707f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_omod_op omod_op = RC_OMOD_DISABLE; 708f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_list * writer_list; 709f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_variable * var; 710f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct peephole_mul_cb_data cb_data; 711f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 712f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (i = 0; i < 2; i++) { 713f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int j; 714f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT 715f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY) { 716f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 717f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 718f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { 719f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (temp_index != -1) { 720f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* The instruction has two temp sources */ 721f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 722f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 723f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org temp_index = i; 724f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 725f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 726f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 727f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* If we get this far Src[i] must be a constant src */ 728f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst_mul->U.I.SrcReg[i].Negate) { 729f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 730f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 731f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* The constant src needs to read from the same swizzle */ 732f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org swz = RC_SWIZZLE_UNUSED; 733f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org chan = 0; 734f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (j = 0; j < 4; j++) { 735f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int j_swz = 736f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j); 737f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (j_swz == RC_SWIZZLE_UNUSED) { 738f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 739f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 740f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (swz == RC_SWIZZLE_UNUSED) { 741f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org swz = j_swz; 742f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org chan = j; 743f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (j_swz != swz) { 744f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 745f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 746f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 747f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 748f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (const_index != -1) { 749f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* The instruction has two constant sources */ 750f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 751f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 752f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const_index = i; 753f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 754f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 755f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 756f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File, 757f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_mul->U.I.SrcReg[const_index].Index)) { 758f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 759f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 760f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const_value = rc_get_constant_value(c, 761f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_mul->U.I.SrcReg[const_index].Index, 762f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_mul->U.I.SrcReg[const_index].Swizzle, 763f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_mul->U.I.SrcReg[const_index].Negate, 764f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org chan); 765f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 766f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (const_value == 2.0f) { 767f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org omod_op = RC_OMOD_MUL_2; 768f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (const_value == 4.0f) { 769f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org omod_op = RC_OMOD_MUL_4; 770f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (const_value == 8.0f) { 771f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org omod_op = RC_OMOD_MUL_8; 772f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (const_value == (1.0f / 2.0f)) { 773f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org omod_op = RC_OMOD_DIV_2; 774f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (const_value == (1.0f / 4.0f)) { 775f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org omod_op = RC_OMOD_DIV_4; 776f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (const_value == (1.0f / 8.0f)) { 777f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org omod_op = RC_OMOD_DIV_8; 778f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 779f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 780f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 781f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 782f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org writer_list = rc_variable_list_get_writers_one_reader(var_list, 783f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]); 784f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 785f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!writer_list) { 786f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 787f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 788f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 789f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cb_data.Clobbered = 0; 790f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cb_data.Writer = &inst_mul->U.I.DstReg; 791f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (var = writer_list->Item; var; var = var->Friend) { 792f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * inst; 793f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const struct rc_opcode_info * info = rc_get_opcode_info( 794f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org var->Inst->U.I.Opcode); 795f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (info->HasTexture) { 796f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 797f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 798f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) { 799f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 800f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 801f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (inst = inst_mul->Prev; inst != var->Inst; 802f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst = inst->Prev) { 803f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_for_all_reads_mask(inst, omod_filter_reader_cb, 804f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org &cb_data); 805f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_for_all_writes_mask(inst, omod_filter_writer_cb, 806f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org &cb_data); 807f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (cb_data.Clobbered) { 808f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 809f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 810f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 811f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 812f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 813f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (cb_data.Clobbered) { 814f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 815f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 816f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 817f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Rewrite the instructions */ 818f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (var = writer_list->Item; var; var = var->Friend) { 819f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_variable * writer = writer_list->Item; 820f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned conversion_swizzle = rc_make_conversion_swizzle( 821f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org writer->Inst->U.I.DstReg.WriteMask, 822f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_mul->U.I.DstReg.WriteMask); 823f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org writer->Inst->U.I.Omod = omod_op; 824f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File; 825f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org writer->Inst->U.I.DstReg.Index = inst_mul->U.I.DstReg.Index; 826f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_normal_rewrite_writemask(writer->Inst, conversion_swizzle); 827f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode; 828f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 829f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 830f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_remove_instruction(inst_mul); 831f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 832f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 1; 833f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 834f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 835f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 836f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * @return 837f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 0 if inst is still part of the program. 838f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 1 if inst is no longer part of the program. 839f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 840f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int peephole(struct radeon_compiler * c, struct rc_instruction * inst) 841f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 842f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch(inst->U.I.Opcode){ 843f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_ADD: 844f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (c->has_presub) { 845f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(peephole_add_presub_inv(c, inst)) 846f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 1; 847f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(peephole_add_presub_add(c, inst)) 848f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 1; 849f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 850f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 851f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 852f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 853f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 854f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 855f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 856f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 857f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid rc_optimize(struct radeon_compiler * c, void *user) 858f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 859f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * inst = c->Program.Instructions.Next; 860f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_list * var_list; 861f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org while(inst != &c->Program.Instructions) { 862f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * cur = inst; 863f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst = inst->Next; 864f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 865f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org constant_folding(c, cur); 866f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 867f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(peephole(c, cur)) 868f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 869f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 870f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (cur->U.I.Opcode == RC_OPCODE_MOV) { 871f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org copy_propagate(c, cur); 872f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* cur may no longer be part of the program */ 873f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 874f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 875f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 876f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!c->has_omod) { 877f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 878f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 879f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 880f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst = c->Program.Instructions.Next; 881f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org while(inst != &c->Program.Instructions) { 882f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * cur = inst; 883f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst = inst->Next; 884f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (cur->U.I.Opcode == RC_OPCODE_MUL) { 885f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org var_list = rc_get_variables(c); 886f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org peephole_mul_omod(c, cur, var_list); 887f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 888f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 889f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 890