1f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* 3f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Copyright 2012 Advanced Micro Devices, Inc. 4f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 5f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Permission is hereby granted, free of charge, to any person obtaining a 6f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * copy of this software and associated documentation files (the "Software"), 7f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * to deal in the Software without restriction, including without limitation 8f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * on the rights to use, copy, modify, merge, publish, distribute, sub 9f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * license, and/or sell copies of the Software, and to permit persons to whom 10f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the Software is furnished to do so, subject to the following conditions: 11f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 12f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The above copyright notice and this permission notice (including the next 13f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * paragraph) shall be included in all copies or substantial portions of the 14f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Software. 15f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 16f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * USE OR OTHER DEALINGS IN THE SOFTWARE. 23f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 24f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Authors: 25f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Tom Stellard <thomas.stellard@amd.com> 26f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Michel Dänzer <michel.daenzer@amd.com> 27f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Christian König <christian.koenig@amd.com> 28f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 29f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 30f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "gallivm/lp_bld_tgsi_action.h" 31f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "gallivm/lp_bld_const.h" 32f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "gallivm/lp_bld_gather.h" 33f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "gallivm/lp_bld_intr.h" 34f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "gallivm/lp_bld_tgsi.h" 35f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_llvm.h" 36f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_llvm_emit.h" 37f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "tgsi/tgsi_info.h" 38f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "tgsi/tgsi_parse.h" 39f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "tgsi/tgsi_scan.h" 40f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "tgsi/tgsi_dump.h" 41f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 42f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeonsi_pipe.h" 43f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeonsi_shader.h" 44f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "si_state.h" 45f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "sid.h" 46f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 47f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include <assert.h> 48f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include <errno.h> 49f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include <stdio.h> 50f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 51f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* 52f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic ps_remap_inputs( 53f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct tgsi_llvm_context * tl_ctx, 54f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned tgsi_index, 55f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned tgsi_chan) 56f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 57f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org : 58f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 59f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 60f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct si_input 61f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 62f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct list_head head; 63f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned tgsi_index; 64f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned tgsi_chan; 65f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned order; 66f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; 67f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org*/ 68f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 69f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 70f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct si_shader_context 71f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 72f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct radeon_llvm_context radeon_bld; 73f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct r600_context *rctx; 74f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct tgsi_parse_context parse; 75f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct tgsi_token * tokens; 76f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct si_pipe_shader *shader; 77f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */ 78f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* unsigned num_inputs; */ 79f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* struct list_head inputs; */ 80f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* unsigned * input_mappings *//* From TGSI to SI hw */ 81f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* struct tgsi_shader_info info;*/ 82f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; 83f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 84f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic struct si_shader_context * si_shader_context( 85f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct lp_build_tgsi_context * bld_base) 86f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 87f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return (struct si_shader_context *)bld_base; 88f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 89f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 90f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 91f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define PERSPECTIVE_BASE 0 92f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define LINEAR_BASE 9 93f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 94f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define SAMPLE_OFFSET 0 95f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define CENTER_OFFSET 2 96f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define CENTROID_OFSET 4 97f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 98f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define USE_SGPR_MAX_SUFFIX_LEN 5 99f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define CONST_ADDR_SPACE 2 100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define USER_SGPR_ADDR_SPACE 8 101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgenum sgpr_type { 103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SGPR_CONST_PTR_F32, 104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SGPR_CONST_PTR_V4I32, 105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SGPR_CONST_PTR_V8I32, 106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SGPR_I32, 107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SGPR_I64 108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; 109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad 112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * @param offset The offset parameter specifies the number of 114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * elements to offset, not the number of bytes or dwords. An element is the 115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the type pointed to by the base_ptr parameter (e.g. int is the element of 116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * an int* pointer) 117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * When LLVM lowers the load instruction, it will convert the element offset 119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * into a dword offset automatically. 120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic LLVMValueRef build_indexed_load( 123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct gallivm_state * gallivm, 124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef base_ptr, 125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef offset) 126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef computed_ptr = LLVMBuildGEP( 128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org gallivm->builder, base_ptr, &offset, 1, ""); 129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return LLVMBuildLoad(gallivm->builder, computed_ptr, ""); 131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Load a value stored in one of the user SGPRs 135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * @param sgpr This is the sgpr to load the value from. If you need to load a 137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * value that is stored in consecutive SGPR registers (e.g. a 64-bit pointer), 138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * then you should pass the index of the first SGPR that holds the value. For 139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * example, if you want to load a pointer that is stored in SGPRs 2 and 3, then 140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * use pass 2 for the sgpr parameter. 141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The value of the sgpr parameter must also be aligned to the width of the type 143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * being loaded, so that the sgpr parameter is divisible by the dword width of the 144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * type. For example, if the value being loaded is two dwords wide, then the sgpr 145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * parameter must be divisible by two. 146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic LLVMValueRef use_sgpr( 148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct gallivm_state * gallivm, 149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org enum sgpr_type type, 150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned sgpr) 151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef sgpr_index; 153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMTypeRef ret_type; 154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef ptr; 155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sgpr_index = lp_build_const_int32(gallivm, sgpr); 157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (type) { 159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SGPR_CONST_PTR_F32: 160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(sgpr % 2 == 0); 161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ret_type = LLVMFloatTypeInContext(gallivm->context); 162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE); 163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SGPR_I32: 166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ret_type = LLVMInt32TypeInContext(gallivm->context); 167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SGPR_I64: 170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(sgpr % 2 == 0); 171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ret_type= LLVMInt64TypeInContext(gallivm->context); 172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SGPR_CONST_PTR_V4I32: 175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(sgpr % 2 == 0); 176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ret_type = LLVMInt32TypeInContext(gallivm->context); 177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ret_type = LLVMVectorType(ret_type, 4); 178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE); 179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SGPR_CONST_PTR_V8I32: 182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(sgpr % 2 == 0); 183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ret_type = LLVMInt32TypeInContext(gallivm->context); 184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ret_type = LLVMVectorType(ret_type, 8); 185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE); 186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(!"Unsupported SGPR type in use_sgpr()"); 190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return NULL; 191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ret_type = LLVMPointerType(ret_type, USER_SGPR_ADDR_SPACE); 194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ptr = LLVMBuildIntToPtr(gallivm->builder, sgpr_index, ret_type, ""); 195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return LLVMBuildLoad(gallivm->builder, ptr, ""); 196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void declare_input_vs( 199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct si_shader_context * si_shader_ctx, 200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned input_index, 201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const struct tgsi_full_declaration *decl) 202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef t_list_ptr; 204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef t_offset; 205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef t_list; 206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef attribute_offset; 207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef buffer_index_reg; 208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef args[3]; 209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMTypeRef vec4_type; 210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef input; 211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct lp_build_context * uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; 212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base; 213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct r600_context *rctx = si_shader_ctx->rctx; 214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org //struct pipe_vertex_element *velem = &rctx->vertex_elements->elements[input_index]; 215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned chan; 216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Load the T list */ 218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* XXX: Communicate with the rest of the driver about which SGPR the T# 219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * list pointer is going to be stored in. Hard code to SGPR[6:7] for 220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * now */ 221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org t_list_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_V4I32, 6); 222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org t_offset = lp_build_const_int32(base->gallivm, input_index); 224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org t_list = build_indexed_load(base->gallivm, t_list_ptr, t_offset); 226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Build the attribute offset */ 228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org attribute_offset = lp_build_const_int32(base->gallivm, 0); 229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Load the buffer index is always, which is always stored in VGPR0 231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * for Vertex Shaders */ 232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org buffer_index_reg = build_intrinsic(base->gallivm->builder, 233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org "llvm.SI.vs.load.buffer.index", uint->elem_type, NULL, 0, 234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMReadNoneAttribute); 235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org vec4_type = LLVMVectorType(base->elem_type, 4); 237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[0] = t_list; 238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[1] = attribute_offset; 239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[2] = buffer_index_reg; 240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org input = lp_build_intrinsic(base->gallivm->builder, 241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org "llvm.SI.vs.load.input", vec4_type, args, 3); 242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Break up the vec4 into individual components */ 244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (chan = 0; chan < 4; chan++) { 245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef llvm_chan = lp_build_const_int32(base->gallivm, chan); 246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* XXX: Use a helper function for this. There is one in 247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * tgsi_llvm.c. */ 248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, chan)] = 249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMBuildExtractElement(base->gallivm->builder, 250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org input, llvm_chan, ""); 251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void declare_input_fs( 255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct si_shader_context * si_shader_ctx, 256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned input_index, 257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const struct tgsi_full_declaration *decl) 258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const char * intr_name; 260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned chan; 261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct lp_build_context * base = 262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org &si_shader_ctx->radeon_bld.soa.bld_base.base; 263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct gallivm_state * gallivm = base->gallivm; 264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* This value is: 266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * [15:0] NewPrimMask (Bit mask for each quad. It is set it the 267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * quad begins a new primitive. Bit 0 always needs 268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * to be unset) 269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * [32:16] ParamOffset 270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* XXX: This register number must be identical to the S_00B02C_USER_SGPR 273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * register field value 274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef params = use_sgpr(base->gallivm, SGPR_I32, 6); 276f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 277f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 278f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* XXX: Is this the input_index? */ 279f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef attr_number = lp_build_const_int32(gallivm, input_index); 280f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 281f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* XXX: Handle all possible interpolation modes */ 282f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (decl->Interp.Interpolate) { 283f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TGSI_INTERPOLATE_COLOR: 284f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* XXX: Flat shading hangs the GPU */ 285f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (si_shader_ctx->rctx->queued.named.rasterizer && 286f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org si_shader_ctx->rctx->queued.named.rasterizer->flatshade) { 287f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#if 0 288f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org intr_name = "llvm.SI.fs.interp.constant"; 289f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#else 290f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org intr_name = "llvm.SI.fs.interp.linear.center"; 291f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#endif 292f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 293f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (decl->Interp.Centroid) 294f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org intr_name = "llvm.SI.fs.interp.persp.centroid"; 295f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 296f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org intr_name = "llvm.SI.fs.interp.persp.center"; 297f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 298f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 299f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TGSI_INTERPOLATE_CONSTANT: 300f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* XXX: Flat shading hangs the GPU */ 301f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#if 0 302f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org intr_name = "llvm.SI.fs.interp.constant"; 303f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 304f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#endif 305f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TGSI_INTERPOLATE_LINEAR: 306f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (decl->Interp.Centroid) 307f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org intr_name = "llvm.SI.fs.interp.linear.centroid"; 308f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 309f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org intr_name = "llvm.SI.fs.interp.linear.center"; 310f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 311f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TGSI_INTERPOLATE_PERSPECTIVE: 312f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (decl->Interp.Centroid) 313f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org intr_name = "llvm.SI.fs.interp.persp.centroid"; 314f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 315f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org intr_name = "llvm.SI.fs.interp.persp.center"; 316f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 317f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 318f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fprintf(stderr, "Warning: Unhandled interpolation mode.\n"); 319f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 320f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 321f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 322f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* XXX: Could there be more than TGSI_NUM_CHANNELS (4) ? */ 323f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 324f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef args[3]; 325f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan); 326f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan); 327f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context); 328f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[0] = llvm_chan; 329f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[1] = attr_number; 330f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[2] = params; 331f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org si_shader_ctx->radeon_bld.inputs[soa_index] = 332f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org build_intrinsic(base->gallivm->builder, intr_name, 333f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org input_type, args, 3, LLVMReadOnlyAttribute); 334f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 335f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 336f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 337f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void declare_input( 338f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct radeon_llvm_context * radeon_bld, 339f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned input_index, 340f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const struct tgsi_full_declaration *decl) 341f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 342f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct si_shader_context * si_shader_ctx = 343f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org si_shader_context(&radeon_bld->soa.bld_base); 344f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) { 345f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org declare_input_vs(si_shader_ctx, input_index, decl); 346f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) { 347f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org declare_input_fs(si_shader_ctx, input_index, decl); 348f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 349f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fprintf(stderr, "Warning: Unsupported shader type,\n"); 350f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 351f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 352f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 353f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic LLVMValueRef fetch_constant( 354f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct lp_build_tgsi_context * bld_base, 355f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const struct tgsi_full_src_register *reg, 356f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org enum tgsi_opcode_type type, 357f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned swizzle) 358f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 359f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct lp_build_context * base = &bld_base->base; 360f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 361f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef const_ptr; 362f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef offset; 363f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef load; 364f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 365f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* XXX: Assume the pointer to the constant buffer is being stored in 366f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * SGPR[0:1] */ 367f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_F32, 0); 368f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 369f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* XXX: This assumes that the constant buffer is not packed, so 370f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * CONST[0].x will have an offset of 0 and CONST[1].x will have an 371f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * offset of 4. */ 372f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org offset = lp_build_const_int32(base->gallivm, 373f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (reg->Register.Index * 4) + swizzle); 374f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 375f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org load = build_indexed_load(base->gallivm, const_ptr, offset); 376f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return bitcast(bld_base, type, load); 377f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 378f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 379f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* Initialize arguments for the shader export intrinsic */ 380f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, 381f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct tgsi_full_declaration *d, 382f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned index, 383f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned target, 384f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef *args) 385f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 386f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); 387f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct lp_build_context *uint = 388f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; 389f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct lp_build_context *base = &bld_base->base; 390f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned compressed = 0; 391f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned chan; 392f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 393f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) { 394f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int cbuf = target - V_008DFC_SQ_EXP_MRT; 395f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 396f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (cbuf >= 0 && cbuf < 8) { 397f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct r600_context *rctx = si_shader_ctx->rctx; 398f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org compressed = (rctx->export_16bpc >> cbuf) & 0x1; 399f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 400f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 401f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 402f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (compressed) { 403f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Pixel shader needs to pack output values before export */ 404f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (chan = 0; chan < 2; chan++ ) { 405f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef *out_ptr = 406f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org si_shader_ctx->radeon_bld.soa.outputs[index]; 407f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[0] = LLVMBuildLoad(base->gallivm->builder, 408f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org out_ptr[2 * chan], ""); 409f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[1] = LLVMBuildLoad(base->gallivm->builder, 410f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org out_ptr[2 * chan + 1], ""); 411f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[chan + 5] = 412f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org build_intrinsic(base->gallivm->builder, 413f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org "llvm.SI.packf16", 414f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMInt32TypeInContext(base->gallivm->context), 415f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args, 2, 416f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMReadNoneAttribute); 417f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[chan + 7] = args[chan + 5]; 418f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 419f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 420f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Set COMPR flag */ 421f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[4] = uint->one; 422f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 423f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (chan = 0; chan < 4; chan++ ) { 424f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef out_ptr = 425f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org si_shader_ctx->radeon_bld.soa.outputs[index][chan]; 426f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* +5 because the first output value will be 427f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the 6th argument to the intrinsic. */ 428f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[chan + 5] = LLVMBuildLoad(base->gallivm->builder, 429f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org out_ptr, ""); 430f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 431f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 432f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Clear COMPR flag */ 433f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[4] = uint->zero; 434f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 435f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 436f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* XXX: This controls which components of the output 437f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * registers actually get exported. (e.g bit 0 means export 438f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * X component, bit 1 means export Y component, etc.) I'm 439f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * hard coding this to 0xf for now. In the future, we might 440f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * want to do something else. */ 441f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[0] = lp_build_const_int32(base->gallivm, 0xf); 442f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 443f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Specify whether the EXEC mask represents the valid mask */ 444f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[1] = uint->zero; 445f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 446f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Specify whether this is the last export */ 447f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[2] = uint->zero; 448f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 449f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Specify the target we are exporting */ 450f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[3] = lp_build_const_int32(base->gallivm, target); 451f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 452f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* XXX: We probably need to keep track of the output 453f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * values, so we know what we are passing to the next 454f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * stage. */ 455f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 456f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 457f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* XXX: This is partially implemented for VS only at this point. It is not complete */ 458f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) 459f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 460f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct si_shader_context * si_shader_ctx = si_shader_context(bld_base); 461f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct si_shader * shader = &si_shader_ctx->shader->shader; 462f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct lp_build_context * base = &bld_base->base; 463f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct lp_build_context * uint = 464f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; 465f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct tgsi_parse_context *parse = &si_shader_ctx->parse; 466f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef last_args[9] = { 0 }; 467f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned color_count = 0; 468f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned param_count = 0; 469f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 470f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org while (!tgsi_parse_end_of_tokens(parse)) { 471f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct tgsi_full_declaration *d = 472f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org &parse->FullToken.FullDeclaration; 473f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef args[9]; 474f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned target; 475f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned index; 476f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int i; 477f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 478f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tgsi_parse_token(parse); 479f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (parse->FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION) 480f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 481f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 482f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (d->Declaration.File) { 483f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TGSI_FILE_INPUT: 484f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i = shader->ninput++; 485f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shader->input[i].name = d->Semantic.Name; 486f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shader->input[i].sid = d->Semantic.Index; 487f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shader->input[i].interpolate = d->Interp.Interpolate; 488f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shader->input[i].centroid = d->Interp.Centroid; 489f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 490f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 491f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TGSI_FILE_OUTPUT: 492f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i = shader->noutput++; 493f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shader->output[i].name = d->Semantic.Name; 494f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shader->output[i].sid = d->Semantic.Index; 495f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shader->output[i].interpolate = d->Interp.Interpolate; 496f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 497f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 498f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 499f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 500f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 501f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 502f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (index = d->Range.First; index <= d->Range.Last; index++) { 503f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Select the correct target */ 504f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch(d->Semantic.Name) { 505f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TGSI_SEMANTIC_POSITION: 506f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org target = V_008DFC_SQ_EXP_POS; 507f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 508f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TGSI_SEMANTIC_COLOR: 509f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) { 510f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org target = V_008DFC_SQ_EXP_PARAM + param_count; 511f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shader->output[i].param_offset = param_count; 512f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org param_count++; 513f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 514f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org target = V_008DFC_SQ_EXP_MRT + color_count; 515f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org color_count++; 516f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 517f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 518f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TGSI_SEMANTIC_GENERIC: 519f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org target = V_008DFC_SQ_EXP_PARAM + param_count; 520f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shader->output[i].param_offset = param_count; 521f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org param_count++; 522f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 523f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 524f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org target = 0; 525f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fprintf(stderr, 526f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org "Warning: SI unhandled output type:%d\n", 527f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org d->Semantic.Name); 528f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 529f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 530f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org si_llvm_init_export_args(bld_base, d, index, target, args); 531f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 532f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX ? 533f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (d->Semantic.Name == TGSI_SEMANTIC_POSITION) : 534f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (d->Semantic.Name == TGSI_SEMANTIC_COLOR)) { 535f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (last_args[0]) { 536f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org lp_build_intrinsic(base->gallivm->builder, 537f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org "llvm.SI.export", 538f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMVoidTypeInContext(base->gallivm->context), 539f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org last_args, 9); 540f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 541f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 542f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org memcpy(last_args, args, sizeof(args)); 543f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 544f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org lp_build_intrinsic(base->gallivm->builder, 545f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org "llvm.SI.export", 546f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMVoidTypeInContext(base->gallivm->context), 547f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args, 9); 548f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 549f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 550f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 551f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 552f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 553f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!last_args[0]) { 554f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT); 555f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 556f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Specify which components to enable */ 557f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org last_args[0] = lp_build_const_int32(base->gallivm, 0x0); 558f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 559f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Specify the target we are exporting */ 560f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org last_args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRT); 561f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 562f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Set COMPR flag to zero to export data as 32-bit */ 563f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org last_args[4] = uint->zero; 564f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 565f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* dummy bits */ 566f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org last_args[5]= uint->zero; 567f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org last_args[6]= uint->zero; 568f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org last_args[7]= uint->zero; 569f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org last_args[8]= uint->zero; 570f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 571f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 572f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Specify whether the EXEC mask represents the valid mask */ 573f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org last_args[1] = lp_build_const_int32(base->gallivm, 574f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT); 575f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 576f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Specify that this is the last export */ 577f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org last_args[2] = lp_build_const_int32(base->gallivm, 1); 578f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 579f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org lp_build_intrinsic(base->gallivm->builder, 580f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org "llvm.SI.export", 581f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMVoidTypeInContext(base->gallivm->context), 582f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org last_args, 9); 583f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 584f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* XXX: Look up what this function does */ 585f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);*/ 586f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 587f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 588f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void tex_fetch_args( 589f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct lp_build_tgsi_context * bld_base, 590f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct lp_build_emit_data * emit_data) 591f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 592f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const struct tgsi_full_instruction * inst = emit_data->inst; 593f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef ptr; 594f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef offset; 595f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 596f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* WriteMask */ 597f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* XXX: should be optimized using emit_data->inst->Dst[0].Register.WriteMask*/ 598f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit_data->args[0] = lp_build_const_int32(bld_base->base.gallivm, 0xf); 599f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 600f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Coordinates */ 601f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* XXX: Not all sample instructions need 4 address arguments. */ 602f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 603f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef src_w; 604f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned chan; 605f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef coords[4]; 606f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 607f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); 608f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W); 609f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 610f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (chan = 0; chan < 3; chan++ ) { 611f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef arg = lp_build_emit_fetch(bld_base, 612f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit_data->inst, 0, chan); 613f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org coords[chan] = lp_build_emit_llvm_binary(bld_base, 614f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org TGSI_OPCODE_DIV, 615f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org arg, src_w); 616f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 617f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org coords[3] = bld_base->base.one; 618f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit_data->args[1] = lp_build_gather_values(bld_base->base.gallivm, 619f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org coords, 4); 620f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else 621f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, 622f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 0, LP_CHAN_ALL); 623f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 624f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Resource */ 625f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V8I32, 4); 626f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org offset = lp_build_const_int32(bld_base->base.gallivm, 627f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit_data->inst->Src[1].Register.Index); 628f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit_data->args[2] = build_indexed_load(bld_base->base.gallivm, 629f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ptr, offset); 630f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 631f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Sampler */ 632f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V4I32, 2); 633f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org offset = lp_build_const_int32(bld_base->base.gallivm, 634f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit_data->inst->Src[1].Register.Index); 635f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit_data->args[3] = build_indexed_load(bld_base->base.gallivm, 636f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ptr, offset); 637f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 638f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Dimensions */ 639f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* XXX: We might want to pass this information to the shader at some. */ 640f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* emit_data->args[4] = lp_build_const_int32(bld_base->base.gallivm, 641f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit_data->inst->Texture.Texture); 642f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org*/ 643f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 644f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit_data->arg_count = 4; 645f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* XXX: To optimize, we could use a float or v2f32, if the last bits of 646f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the writemask are clear */ 647f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit_data->dst_type = LLVMVectorType( 648f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMFloatTypeInContext(bld_base->base.gallivm->context), 649f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 4); 650f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 651f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 652f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic const struct lp_build_tgsi_action tex_action = { 653f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .fetch_args = tex_fetch_args, 654f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .emit = lp_build_tgsi_intrinsic, 655f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .intr_name = "llvm.SI.sample" 656f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; 657f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 658f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 659f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgint si_pipe_shader_create( 660f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct pipe_context *ctx, 661f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct si_pipe_shader *shader) 662f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 663f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct r600_context *rctx = (struct r600_context*)ctx; 664f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct si_pipe_shader_selector *sel = shader->selector; 665f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct si_shader_context si_shader_ctx; 666f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct tgsi_shader_info shader_info; 667f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct lp_build_tgsi_context * bld_base; 668f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMModuleRef mod; 669f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned char * inst_bytes; 670f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned inst_byte_count; 671f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned i; 672f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org uint32_t *ptr; 673f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool dump; 674f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 675f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dump = debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE); 676f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 677f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org memset(&si_shader_ctx.radeon_bld, 0, sizeof(si_shader_ctx.radeon_bld)); 678f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org radeon_llvm_context_init(&si_shader_ctx.radeon_bld); 679f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld_base = &si_shader_ctx.radeon_bld.soa.bld_base; 680f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 681f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tgsi_scan_shader(sel->tokens, &shader_info); 682f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld_base->info = &shader_info; 683f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant; 684f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld_base->emit_epilogue = si_llvm_emit_epilogue; 685f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 686f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action; 687f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action; 688f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 689f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org si_shader_ctx.radeon_bld.load_input = declare_input; 690f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org si_shader_ctx.tokens = sel->tokens; 691f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tgsi_parse_init(&si_shader_ctx.parse, si_shader_ctx.tokens); 692f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org si_shader_ctx.shader = shader; 693f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org si_shader_ctx.type = si_shader_ctx.parse.FullHeader.Processor.Processor; 694f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org si_shader_ctx.rctx = rctx; 695f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 696f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shader->shader.nr_cbufs = rctx->framebuffer.nr_cbufs; 697f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 698f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Dump TGSI code before doing TGSI->LLVM conversion in case the 699f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * conversion fails. */ 700f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (dump) { 701f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tgsi_dump(sel->tokens, 0); 702f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 703f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 704f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) { 705f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n"); 706f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return -EINVAL; 707f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 708f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 709f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld); 710f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 711f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mod = bld_base->base.gallivm->module; 712f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (dump) { 713f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMDumpModule(mod); 714f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 715f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org radeon_llvm_compile(mod, &inst_bytes, &inst_byte_count, "SI", dump); 716f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (dump) { 717f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fprintf(stderr, "SI CODE:\n"); 718f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (i = 0; i < inst_byte_count; i+=4 ) { 719f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fprintf(stderr, "%02x%02x%02x%02x\n", inst_bytes[i + 3], 720f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_bytes[i + 2], inst_bytes[i + 1], 721f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_bytes[i]); 722f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 723f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 724f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 725f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shader->num_sgprs = util_le32_to_cpu(*(uint32_t*)inst_bytes); 726f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shader->num_vgprs = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 4)); 727f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shader->spi_ps_input_ena = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 8)); 728f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 729f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org radeon_llvm_dispose(&si_shader_ctx.radeon_bld); 730f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tgsi_parse_free(&si_shader_ctx.parse); 731f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 732f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* copy new shader */ 733f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org si_resource_reference(&shader->bo, NULL); 734f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shader->bo = si_resource_create_custom(ctx->screen, PIPE_USAGE_IMMUTABLE, 735f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_byte_count - 12); 736f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (shader->bo == NULL) { 737f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return -ENOMEM; 738f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 739f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 740f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE); 741f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (0 /*R600_BIG_ENDIAN*/) { 742f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (i = 0; i < (inst_byte_count-12)/4; ++i) { 743f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ptr[i] = util_bswap32(*(uint32_t*)(inst_bytes+12 + i*4)); 744f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 745f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 746f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org memcpy(ptr, inst_bytes + 12, inst_byte_count - 12); 747f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 748f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rctx->ws->buffer_unmap(shader->bo->cs_buf); 749f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 750f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org free(inst_bytes); 751f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 752f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 753f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 754f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 755f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader) 756f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 757f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org si_resource_reference(&shader->bo, NULL); 758f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 759