1f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* 2f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Copyright 2010 Tom Stellard <tstellar@gmail.com> 3f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 4f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * All Rights Reserved. 5f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 6f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Permission is hereby granted, free of charge, to any person obtaining 7f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * a copy of this software and associated documentation files (the 8f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * "Software"), to deal in the Software without restriction, including 9f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * without limitation the rights to use, copy, modify, merge, publish, 10f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * distribute, sublicense, and/or sell copies of the Software, and to 11f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * permit persons to whom the Software is furnished to do so, subject to 12f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the following conditions: 13f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 14f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The above copyright notice and this permission notice (including the 15f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * next paragraph) shall be included in all copies or substantial 16f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * portions of the Software. 17f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 18f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 26f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 27f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 28f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 29f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * \file 30f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 31f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 32f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_emulate_loops.h" 33f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 34f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_compiler.h" 35f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_compiler_util.h" 36f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_dataflow.h" 37f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 38f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define VERBOSE 0 39f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 40f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) 41f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 42f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct const_value { 43f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct radeon_compiler * C; 44f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_src_register * Src; 45f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org float Value; 46f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int HasValue; 47f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; 48f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 49f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct count_inst { 50f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct radeon_compiler * C; 51f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int Index; 52f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_swizzle Swz; 53f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org float Amount; 54f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int Unknown; 55f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned BranchDepth; 56f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; 57f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 58f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic unsigned int loop_max_possible_iterations(struct radeon_compiler *c, 59f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct loop_info * loop) 60f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 61f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int total_i = rc_recompute_ips(c); 62f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1; 63f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* +1 because the program already has one iteration of the loop. */ 64f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 1 + ((c->max_alu_insts - total_i) / loop_i); 65f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 66f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 67f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void unroll_loop(struct radeon_compiler * c, struct loop_info * loop, 68f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int iterations) 69f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 70f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int i; 71f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * ptr; 72f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * first = loop->BeginLoop->Next; 73f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * last = loop->EndLoop->Prev; 74f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * append_to = last; 75f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_remove_instruction(loop->BeginLoop); 76f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_remove_instruction(loop->EndLoop); 77f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for( i = 1; i < iterations; i++){ 78f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for(ptr = first; ptr != last->Next; ptr = ptr->Next){ 79f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction *new = rc_alloc_instruction(c); 80f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org memcpy(new, ptr, sizeof(struct rc_instruction)); 81f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_insert_instruction(append_to, new); 82f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org append_to = new; 83f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 84f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 85f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 86f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 87f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 88f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void update_const_value(void * data, struct rc_instruction * inst, 89f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_register_file file, unsigned int index, unsigned int mask) 90f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 91f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct const_value * value = data; 92f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(value->Src->File != file || 93f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org value->Src->Index != index || 94f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org !(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){ 95f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 96f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 97f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch(inst->U.I.Opcode){ 98f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_MOV: 99f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(!rc_src_reg_is_immediate(value->C, inst->U.I.SrcReg[0].File, 100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.SrcReg[0].Index)){ 101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org value->HasValue = 1; 104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org value->Value = 105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_get_constant_value(value->C, 106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.SrcReg[0].Index, 107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.SrcReg[0].Swizzle, 108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.SrcReg[0].Negate, 0); 109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void get_incr_amount(void * data, struct rc_instruction * inst, 114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_register_file file, unsigned int index, unsigned int mask) 115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct count_inst * count_inst = data; 117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int amnt_src_index; 118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const struct rc_opcode_info * opcode; 119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org float amount; 120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(file != RC_FILE_TEMPORARY || 122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org count_inst->Index != index || 123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (1 << GET_SWZ(count_inst->Swz,0) != mask)){ 124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* XXX: Give up if the counter is modified within an IF block. We 128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * could handle this case with better analysis. */ 129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (count_inst->BranchDepth > 0) { 130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org count_inst->Unknown = 1; 131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Find the index of the counter register. */ 135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org opcode = rc_get_opcode_info(inst->U.I.Opcode); 136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(opcode->NumSrcRegs != 2){ 137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org count_inst->Unknown = 1; 138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY && 141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.SrcReg[0].Index == count_inst->Index && 142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){ 143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org amnt_src_index = 1; 144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY && 145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.SrcReg[1].Index == count_inst->Index && 146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){ 147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org amnt_src_index = 0; 148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else{ 150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org count_inst->Unknown = 1; 151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(rc_src_reg_is_immediate(count_inst->C, 154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.SrcReg[amnt_src_index].File, 155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.SrcReg[amnt_src_index].Index)){ 156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org amount = rc_get_constant_value(count_inst->C, 157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.SrcReg[amnt_src_index].Index, 158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.SrcReg[amnt_src_index].Swizzle, 159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->U.I.SrcReg[amnt_src_index].Negate, 0); 160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else{ 162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org count_inst->Unknown = 1 ; 163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch(inst->U.I.Opcode){ 166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_ADD: 167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org count_inst->Amount += amount; 168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_SUB: 170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(amnt_src_index == 0){ 171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org count_inst->Unknown = 0; 172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org count_inst->Amount -= amount; 175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org count_inst->Unknown = 1; 178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * If c->max_alu_inst is -1, then all eligible loops will be unrolled regardless 184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * of how many iterations they have. 185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop) 187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int end_loops; 189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int iterations; 190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct count_inst count_inst; 191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org float limit_value; 192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_src_register * counter; 193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_src_register * limit; 194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct const_value counter_value; 195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * inst; 196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Find the counter and the upper limit */ 198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(rc_src_reg_is_immediate(c, loop->Cond->U.I.SrcReg[0].File, 200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org loop->Cond->U.I.SrcReg[0].Index)){ 201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org limit = &loop->Cond->U.I.SrcReg[0]; 202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org counter = &loop->Cond->U.I.SrcReg[1]; 203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else if(rc_src_reg_is_immediate(c, loop->Cond->U.I.SrcReg[1].File, 205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org loop->Cond->U.I.SrcReg[1].Index)){ 206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org limit = &loop->Cond->U.I.SrcReg[1]; 207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org counter = &loop->Cond->U.I.SrcReg[0]; 208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else{ 210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DBG("No constant limit.\n"); 211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Find the initial value of the counter */ 215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org counter_value.Src = counter; 216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org counter_value.Value = 0.0f; 217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org counter_value.HasValue = 0; 218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org counter_value.C = c; 219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for(inst = c->Program.Instructions.Next; inst != loop->BeginLoop; 220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst = inst->Next){ 221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_for_all_writes_mask(inst, update_const_value, &counter_value); 222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(!counter_value.HasValue){ 224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DBG("Initial counter value cannot be determined.\n"); 225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DBG("Initial counter value is %f\n", counter_value.Value); 228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Determine how the counter is modified each loop */ 229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org count_inst.C = c; 230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org count_inst.Index = counter->Index; 231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org count_inst.Swz = counter->Swizzle; 232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org count_inst.Amount = 0.0f; 233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org count_inst.Unknown = 0; 234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org count_inst.BranchDepth = 0; 235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org end_loops = 1; 236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){ 237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch(inst->U.I.Opcode){ 238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* XXX In the future we might want to try to unroll nested 239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * loops here.*/ 240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_BGNLOOP: 241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org end_loops++; 242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_ENDLOOP: 244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org loop->EndLoop = inst; 245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org end_loops--; 246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_BRK: 248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Don't unroll loops if it has a BRK instruction 249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * other one used when testing the main conditional 250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * of the loop. */ 251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Make sure we haven't entered a nested loops. */ 253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(inst != loop->Brk && end_loops == 1) { 254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_IF: 258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org count_inst.BranchDepth++; 259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_ENDIF: 261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org count_inst.BranchDepth--; 262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_for_all_writes_mask(inst, get_incr_amount, &count_inst); 265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(count_inst.Unknown){ 266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Infinite loop */ 272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(count_inst.Amount == 0.0f){ 273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DBG("Counter is increased by %f each iteration.\n", count_inst.Amount); 276f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Calculate the number of iterations of this loop. Keeping this 277f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * simple, since we only support increment and decrement loops. 278f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 279f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org limit_value = rc_get_constant_value(c, limit->Index, limit->Swizzle, 280f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org limit->Negate, 0); 281f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DBG("Limit is %f.\n", limit_value); 282f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* The iteration calculations are opposite of what you would expect. 283f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * In a normal loop, if the condition is met, then loop continues, but 284f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * with our loops, if the condition is met, the is exited. */ 285f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch(loop->Cond->U.I.Opcode){ 286f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_SGE: 287f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_SLE: 288f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org iterations = (int) ceilf((limit_value - counter_value.Value) / 289f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org count_inst.Amount); 290f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 291f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 292f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_SGT: 293f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_SLT: 294f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org iterations = (int) floorf((limit_value - counter_value.Value) / 295f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org count_inst.Amount) + 1; 296f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 297f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 298f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 299f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 300f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 301f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (c->max_alu_insts > 0 302f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && iterations > loop_max_possible_iterations(c, loop)) { 303f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 304f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 305f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 306f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DBG("Loop will have %d iterations.\n", iterations); 307f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 308f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Prepare loop for unrolling */ 309f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_remove_instruction(loop->Cond); 310f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_remove_instruction(loop->If); 311f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_remove_instruction(loop->Brk); 312f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_remove_instruction(loop->EndIf); 313f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 314f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unroll_loop(c, loop, iterations); 315f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org loop->EndLoop = NULL; 316f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 1; 317f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 318f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 319f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 320f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * @param c 321f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * @param loop 322f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * @param inst A pointer to a BGNLOOP instruction. 323f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * @return 1 if all of the members of loop where set. 324f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * @return 0 if there was an error and some members of loop are still NULL. 325f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 326f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int build_loop_info(struct radeon_compiler * c, struct loop_info * loop, 327f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * inst) 328f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 329f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * ptr; 330f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 331f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){ 332f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_error(c, "%s: expected BGNLOOP", __FUNCTION__); 333f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 334f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 335f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 336f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org memset(loop, 0, sizeof(struct loop_info)); 337f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 338f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org loop->BeginLoop = inst; 339f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 340f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next) { 341f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 342f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (ptr == &c->Program.Instructions) { 343f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_error(c, "%s: BGNLOOP without an ENDLOOOP.\n", 344f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org __FUNCTION__); 345f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 346f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 347f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 348f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch(ptr->U.I.Opcode){ 349f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_BGNLOOP: 350f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org { 351f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Nested loop, skip ahead to the end. */ 352f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int loop_depth = 1; 353f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for(ptr = ptr->Next; ptr != &c->Program.Instructions; 354f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ptr = ptr->Next){ 355f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) { 356f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org loop_depth++; 357f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) { 358f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!--loop_depth) { 359f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 360f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 361f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 362f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 363f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (ptr == &c->Program.Instructions) { 364f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_error(c, "%s: BGNLOOP without an ENDLOOOP\n", 365f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org __FUNCTION__); 366f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 367f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 368f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 369f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 370f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_BRK: 371f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF 372f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org || ptr->Prev->U.I.Opcode != RC_OPCODE_IF 373f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org || loop->Brk){ 374f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 375f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 376f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org loop->Brk = ptr; 377f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org loop->If = ptr->Prev; 378f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org loop->EndIf = ptr->Next; 379f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch(loop->If->Prev->U.I.Opcode){ 380f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_SLT: 381f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_SGE: 382f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_SGT: 383f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_SLE: 384f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_SEQ: 385f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_SNE: 386f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 387f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 388f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 389f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 390f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org loop->Cond = loop->If->Prev; 391f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 392f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 393f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_ENDLOOP: 394f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org loop->EndLoop = ptr; 395f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 396f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 397f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 398f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 399f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (loop->BeginLoop && loop->Brk && loop->If && loop->EndIf 400f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && loop->Cond && loop->EndLoop) { 401f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 1; 402f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 403f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 404f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 405f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 406f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 407f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * This function prepares a loop to be unrolled by converting it into an if 408f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * statement. Here is an outline of the conversion process: 409f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * BGNLOOP; -> BGNLOOP; 410f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * <Additional conditional code> -> <Additional conditional code> 411f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2]; 412f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * IF temp[0]; -> IF temp[0]; 413f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * BRK; -> 414f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * ENDIF; -> <Loop Body> 415f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * <Loop Body> -> ENDIF; 416f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * ENDLOOP; -> ENDLOOP 417f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 418f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * @param inst A pointer to a BGNLOOP instruction. 419f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * @return 1 for success, 0 for failure 420f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 421f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int transform_loop(struct emulate_loop_state * s, 422f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * inst) 423f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 424f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct loop_info * loop; 425f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 426f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org memory_pool_array_reserve(&s->C->Pool, struct loop_info, 427f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org s->Loops, s->LoopCount, s->LoopReserved, 1); 428f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 429f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org loop = &s->Loops[s->LoopCount++]; 430f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 431f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!build_loop_info(s->C, loop, inst)) { 432f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_error(s->C, "Failed to build loop info\n"); 433f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 434f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 435f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 436f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(try_unroll_loop(s->C, loop)){ 437f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 1; 438f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 439f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 440f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Reverse the conditional instruction */ 441f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch(loop->Cond->U.I.Opcode){ 442f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_SGE: 443f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org loop->Cond->U.I.Opcode = RC_OPCODE_SLT; 444f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 445f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_SLT: 446f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org loop->Cond->U.I.Opcode = RC_OPCODE_SGE; 447f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 448f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_SLE: 449f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org loop->Cond->U.I.Opcode = RC_OPCODE_SGT; 450f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 451f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_SGT: 452f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org loop->Cond->U.I.Opcode = RC_OPCODE_SLE; 453f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 454f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_SEQ: 455f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org loop->Cond->U.I.Opcode = RC_OPCODE_SNE; 456f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 457f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_SNE: 458f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org loop->Cond->U.I.Opcode = RC_OPCODE_SEQ; 459f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 460f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 461f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_error(s->C, "loop->Cond is not a conditional.\n"); 462f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 463f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 464f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 465f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Prepare the loop to be emulated */ 466f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_remove_instruction(loop->Brk); 467f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_remove_instruction(loop->EndIf); 468f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf); 469f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 1; 470f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 471f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 472f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid rc_transform_loops(struct radeon_compiler *c, void *user) 473f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 474f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct emulate_loop_state * s = &c->loop_state; 475f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * ptr; 476f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 477f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org memset(s, 0, sizeof(struct emulate_loop_state)); 478f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org s->C = c; 479f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for(ptr = s->C->Program.Instructions.Next; 480f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ptr != &s->C->Program.Instructions; ptr = ptr->Next) { 481f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(ptr->Type == RC_INSTRUCTION_NORMAL && 482f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ 483f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!transform_loop(s, ptr)) 484f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 485f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 486f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 487f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 488f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 489f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid rc_unroll_loops(struct radeon_compiler *c, void *user) 490f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 491f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * inst; 492f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct loop_info loop; 493f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 494f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for(inst = c->Program.Instructions.Next; 495f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst != &c->Program.Instructions; inst = inst->Next) { 496f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 497f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) { 498f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (build_loop_info(c, &loop, inst)) { 499f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org try_unroll_loop(c, &loop); 500f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 501f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 502f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 503f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 504f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 505f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid rc_emulate_loops(struct radeon_compiler *c, void *user) 506f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 507f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct emulate_loop_state * s = &c->loop_state; 508f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int i; 509f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Iterate backwards of the list of loops so that loops that nested 510f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * loops are unrolled first. 511f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 512f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for( i = s->LoopCount - 1; i >= 0; i-- ){ 513f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int iterations; 514f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 515f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(!s->Loops[i].EndLoop){ 516f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 517f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 518f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org iterations = loop_max_possible_iterations(s->C, &s->Loops[i]); 519f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unroll_loop(s->C, &s->Loops[i], iterations); 520f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 521f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 522