1f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie/* 2f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * Copyright © 2016 Bas Nieuwenhuizen 3f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * 4f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * Permission is hereby granted, free of charge, to any person obtaining a 5f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * copy of this software and associated documentation files (the "Software"), 6f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * to deal in the Software without restriction, including without limitation 7f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * and/or sell copies of the Software, and to permit persons to whom the 9f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * Software is furnished to do so, subject to the following conditions: 10f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * 11f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * The above copyright notice and this permission notice (including the next 12f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * paragraph) shall be included in all copies or substantial portions of the 13f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * Software. 14f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * 15f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * IN THE SOFTWARE. 22f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie */ 23f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 24f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie#include "ac_nir_to_llvm.h" 258eabee9ec0c164bd18babfe15311db14040ad337Nicolai Hähnle#include "ac_llvm_util.h" 26f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie#include "ac_binary.h" 27f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie#include "sid.h" 28f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie#include "nir/nir.h" 29f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie#include "../vulkan/radv_descriptor_set.h" 30f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie#include "util/bitscan.h" 31f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie#include <llvm-c/Transforms/Scalar.h> 32f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 33f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlieenum radeon_llvm_calling_convention { 34f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie RADEON_LLVM_AMDGPU_VS = 87, 35f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie RADEON_LLVM_AMDGPU_GS = 88, 36f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie RADEON_LLVM_AMDGPU_PS = 89, 37f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie RADEON_LLVM_AMDGPU_CS = 90, 38f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie}; 39f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 40f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie#define CONST_ADDR_SPACE 2 41f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie#define LOCAL_ADDR_SPACE 3 42f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 43f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie#define RADEON_LLVM_MAX_INPUTS (VARYING_SLOT_VAR31 + 1) 44f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie#define RADEON_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1) 45f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 46f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlieenum desc_type { 47f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie DESC_IMAGE, 48f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie DESC_FMASK, 49f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie DESC_SAMPLER, 50f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie DESC_BUFFER, 51f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie}; 52f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 53f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestruct nir_to_llvm_context { 54a0ce09b4b2a3063e49a02de3d12096cf462d10a3Nicolai Hähnle struct ac_llvm_context ac; 55f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const struct ac_nir_compiler_options *options; 56f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct ac_shader_variant_info *shader_info; 57f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 58f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMContextRef context; 59f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMModuleRef module; 60f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuilderRef builder; 61f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef main_function; 62f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 63f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct hash_table *defs; 64f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct hash_table *phis; 65f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 66ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie LLVMValueRef descriptor_sets[AC_UD_MAX_SETS]; 67f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef push_constants; 68f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef num_work_groups; 69f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef workgroup_ids; 70f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef local_invocation_ids; 71f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef tg_size; 72f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 73f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef vertex_buffers; 74f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef base_vertex; 75f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef start_instance; 76f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef vertex_id; 77f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef rel_auto_id; 78f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef vs_prim_id; 79f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef instance_id; 80f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 81f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef prim_mask; 82f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef sample_positions; 83f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef persp_sample, persp_center, persp_centroid; 84f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef linear_sample, linear_center, linear_centroid; 85f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef front_face; 86f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef ancillary; 87f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef frag_pos[4]; 88f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 89f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBasicBlockRef continue_block; 90f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBasicBlockRef break_block; 91f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 92f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef i1; 93f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef i8; 94f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef i16; 95f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef i32; 96f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef i64; 97f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef v2i32; 98f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef v3i32; 99f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef v4i32; 100f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef v8i32; 101f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef f32; 102f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef f16; 103f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef v2f32; 104f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef v4f32; 105f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef v16i8; 106f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef voidt; 107f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 108f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef i32zero; 109f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef i32one; 110f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef f32zero; 111f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef f32one; 112f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef v4f32empty; 113f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 114f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned range_md_kind; 115f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned uniform_md_kind; 116f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned invariant_load_md_kind; 117f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef empty_md; 118f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie gl_shader_stage stage; 119f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 120f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef lds; 121f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4]; 122f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4]; 123f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 124f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef shared_memory; 125f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie uint64_t input_mask; 126f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie uint64_t output_mask; 127f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int num_locals; 128f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef *locals; 129f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie bool has_ddxy; 130f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned num_clips; 131f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned num_culls; 1325697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie 1335697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie bool has_ds_bpermute; 134f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie}; 135f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 136f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestruct ac_tex_info { 137f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef args[12]; 138f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int arg_count; 139f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef dst_type; 140f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie bool has_offset; 141f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie}; 142f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 143f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx, 144f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_deref_var *deref, 145f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie enum desc_type desc_type); 146f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan) 147f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 148f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return (index * 4) + chan; 149f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 150f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 151f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic unsigned llvm_get_type_size(LLVMTypeRef type) 152f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 153f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeKind kind = LLVMGetTypeKind(type); 154f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 155f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie switch (kind) { 156f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case LLVMIntegerTypeKind: 157f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return LLVMGetIntTypeWidth(type) / 8; 158f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case LLVMFloatTypeKind: 159f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return 4; 160f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case LLVMPointerTypeKind: 161f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return 8; 162f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case LLVMVectorTypeKind: 163f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return LLVMGetVectorSize(type) * 164f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie llvm_get_type_size(LLVMGetElementType(type)); 165f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie default: 166f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie assert(0); 167f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return 0; 168f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 169f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 170f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 171f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void set_llvm_calling_convention(LLVMValueRef func, 172f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie gl_shader_stage stage) 173f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 174f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie enum radeon_llvm_calling_convention calling_conv; 175f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 176f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie switch (stage) { 177f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case MESA_SHADER_VERTEX: 178f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case MESA_SHADER_TESS_CTRL: 179f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case MESA_SHADER_TESS_EVAL: 180f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie calling_conv = RADEON_LLVM_AMDGPU_VS; 181f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 182f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case MESA_SHADER_GEOMETRY: 183f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie calling_conv = RADEON_LLVM_AMDGPU_GS; 184f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 185f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case MESA_SHADER_FRAGMENT: 186f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie calling_conv = RADEON_LLVM_AMDGPU_PS; 187f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 188f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case MESA_SHADER_COMPUTE: 189f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie calling_conv = RADEON_LLVM_AMDGPU_CS; 190f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 191f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie default: 192f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unreachable("Unhandle shader type"); 193f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 194f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 195f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMSetFunctionCallConv(func, calling_conv); 196f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 197f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 198f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef 199f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliecreate_llvm_function(LLVMContextRef ctx, LLVMModuleRef module, 200f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuilderRef builder, LLVMTypeRef *return_types, 201f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned num_return_elems, LLVMTypeRef *param_types, 2020fafe94a3904f3e20a566077d49983d33b909b17Dave Airlie unsigned param_count, unsigned array_params_mask, 203f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned sgpr_params, bool unsafe_math) 204f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 205f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef main_function_type, ret_type; 206f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBasicBlockRef main_function_body; 207f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 208f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (num_return_elems) 209f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ret_type = LLVMStructTypeInContext(ctx, return_types, 210f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie num_return_elems, true); 211f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie else 212f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ret_type = LLVMVoidTypeInContext(ctx); 213f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 214f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* Setup the function */ 215f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie main_function_type = 216f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMFunctionType(ret_type, param_types, param_count, 0); 217f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef main_function = 218f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMAddFunction(module, "main", main_function_type); 219f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie main_function_body = 220f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMAppendBasicBlockInContext(ctx, main_function, "main_body"); 221f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMPositionBuilderAtEnd(builder, main_function_body); 222f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 223f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMSetFunctionCallConv(main_function, RADEON_LLVM_AMDGPU_CS); 224f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned i = 0; i < sgpr_params; ++i) { 2250fafe94a3904f3e20a566077d49983d33b909b17Dave Airlie if (array_params_mask & (1 << i)) { 22619decd8ce43a7fad9b6a276d47296336f062ca23Dave Airlie LLVMValueRef P = LLVMGetParam(main_function, i); 22719decd8ce43a7fad9b6a276d47296336f062ca23Dave Airlie ac_add_function_attr(main_function, i + 1, AC_FUNC_ATTR_BYVAL); 228f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ac_add_attr_dereferenceable(P, UINT64_MAX); 229f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2302fdaf38c0106d87f89fa20fbe229e66d9a3ad14aDave Airlie else { 23119decd8ce43a7fad9b6a276d47296336f062ca23Dave Airlie ac_add_function_attr(main_function, i + 1, AC_FUNC_ATTR_INREG); 2322fdaf38c0106d87f89fa20fbe229e66d9a3ad14aDave Airlie } 233f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 234f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 235f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (unsafe_math) { 236f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* These were copied from some LLVM test. */ 237f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMAddTargetDependentFunctionAttr(main_function, 238f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie "less-precise-fpmad", 239f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie "true"); 240f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMAddTargetDependentFunctionAttr(main_function, 241f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie "no-infs-fp-math", 242f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie "true"); 243f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMAddTargetDependentFunctionAttr(main_function, 244f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie "no-nans-fp-math", 245f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie "true"); 246f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMAddTargetDependentFunctionAttr(main_function, 247f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie "unsafe-fp-math", 248f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie "true"); 249f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 250f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return main_function; 251f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 252f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 253f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements) 254f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 255f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return LLVMPointerType(LLVMArrayType(elem_type, num_elements), 256f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie CONST_ADDR_SPACE); 257f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 258f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 259f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef get_shared_memory_ptr(struct nir_to_llvm_context *ctx, 260f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int idx, 261f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef type) 262f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 263f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef offset; 264f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef ptr; 265f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int addr_space; 266f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 26713afc45f8fe61467e4e97fd3164308562c7c721fAlex Smith offset = LLVMConstInt(ctx->i32, idx * 16, false); 268f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 269f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ptr = ctx->shared_memory; 270f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ptr = LLVMBuildGEP(ctx->builder, ptr, &offset, 1, ""); 271f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); 272f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ptr = LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, addr_space), ""); 273f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return ptr; 274f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 275f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 276f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef to_integer(struct nir_to_llvm_context *ctx, LLVMValueRef v) 277f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 278f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef type = LLVMTypeOf(v); 279f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (type == ctx->f32) { 280f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return LLVMBuildBitCast(ctx->builder, v, ctx->i32, ""); 281f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) { 282f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef elem_type = LLVMGetElementType(type); 283f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (elem_type == ctx->f32) { 284f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef nt = LLVMVectorType(ctx->i32, LLVMGetVectorSize(type)); 285f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return LLVMBuildBitCast(ctx->builder, v, nt, ""); 286f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 287f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 288f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return v; 289f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 290f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 291f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef to_float(struct nir_to_llvm_context *ctx, LLVMValueRef v) 292f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 293f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef type = LLVMTypeOf(v); 294f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (type == ctx->i32) { 295f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return LLVMBuildBitCast(ctx->builder, v, ctx->f32, ""); 296f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) { 297f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef elem_type = LLVMGetElementType(type); 298f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (elem_type == ctx->i32) { 299f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef nt = LLVMVectorType(ctx->f32, LLVMGetVectorSize(type)); 300f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return LLVMBuildBitCast(ctx->builder, v, nt, ""); 301f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 302f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 303f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return v; 304f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 305f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 306220912e21432dba996cb2694dea480c65cf6a7c6Dave Airliestatic LLVMValueRef unpack_param(struct nir_to_llvm_context *ctx, 307220912e21432dba996cb2694dea480c65cf6a7c6Dave Airlie LLVMValueRef param, unsigned rshift, 308220912e21432dba996cb2694dea480c65cf6a7c6Dave Airlie unsigned bitwidth) 309220912e21432dba996cb2694dea480c65cf6a7c6Dave Airlie{ 310220912e21432dba996cb2694dea480c65cf6a7c6Dave Airlie LLVMValueRef value = param; 311220912e21432dba996cb2694dea480c65cf6a7c6Dave Airlie if (rshift) 312220912e21432dba996cb2694dea480c65cf6a7c6Dave Airlie value = LLVMBuildLShr(ctx->builder, value, 313220912e21432dba996cb2694dea480c65cf6a7c6Dave Airlie LLVMConstInt(ctx->i32, rshift, false), ""); 314220912e21432dba996cb2694dea480c65cf6a7c6Dave Airlie 315220912e21432dba996cb2694dea480c65cf6a7c6Dave Airlie if (rshift + bitwidth < 32) { 316220912e21432dba996cb2694dea480c65cf6a7c6Dave Airlie unsigned mask = (1 << bitwidth) - 1; 317220912e21432dba996cb2694dea480c65cf6a7c6Dave Airlie value = LLVMBuildAnd(ctx->builder, value, 318220912e21432dba996cb2694dea480c65cf6a7c6Dave Airlie LLVMConstInt(ctx->i32, mask, false), ""); 319220912e21432dba996cb2694dea480c65cf6a7c6Dave Airlie } 320220912e21432dba996cb2694dea480c65cf6a7c6Dave Airlie return value; 321220912e21432dba996cb2694dea480c65cf6a7c6Dave Airlie} 322220912e21432dba996cb2694dea480c65cf6a7c6Dave Airlie 3235697cfb7ec08e827a48adc2cd34364696e209147Dave Airliestatic LLVMValueRef build_gep0(struct nir_to_llvm_context *ctx, 3245697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie LLVMValueRef base_ptr, LLVMValueRef index) 3255697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie{ 3265697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie LLVMValueRef indices[2] = { 3275697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie ctx->i32zero, 3285697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie index, 3295697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie }; 3305697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie return LLVMBuildGEP(ctx->builder, base_ptr, 3315697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie indices, 2, ""); 3325697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie} 3335697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie 334f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef build_indexed_load(struct nir_to_llvm_context *ctx, 335f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef base_ptr, LLVMValueRef index, 336f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie bool uniform) 337f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 338f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef pointer; 3395697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie pointer = build_gep0(ctx, base_ptr, index); 340f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (uniform) 341f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md); 342f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return LLVMBuildLoad(ctx->builder, pointer, ""); 343f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 344f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 345f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef build_indexed_load_const(struct nir_to_llvm_context *ctx, 346f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef base_ptr, LLVMValueRef index) 347f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 348f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef result = build_indexed_load(ctx, base_ptr, index, true); 349f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md); 350f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return result; 351f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 352f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 353ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airliestatic void set_userdata_location(struct ac_userdata_info *ud_info, uint8_t sgpr_idx, uint8_t num_sgprs) 354ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie{ 355ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie ud_info->sgpr_idx = sgpr_idx; 356ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie ud_info->num_sgprs = num_sgprs; 357ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie ud_info->indirect = false; 358ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie ud_info->indirect_offset = 0; 359ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie} 360ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie 361ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airliestatic void set_userdata_location_shader(struct nir_to_llvm_context *ctx, 362ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie int idx, uint8_t sgpr_idx, uint8_t num_sgprs) 363ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie{ 364ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie set_userdata_location(&ctx->shader_info->user_sgprs_locs.shader_data[idx], sgpr_idx, num_sgprs); 365ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie} 366ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie 367ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie#if 0 368ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airliestatic void set_userdata_location_indirect(struct ac_userdata_info *ud_info, uint8_t sgpr_idx, uint8_t num_sgprs, 369ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie uint32_t indirect_offset) 370ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie{ 371ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie ud_info->sgpr_idx = sgpr_idx; 372ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie ud_info->num_sgprs = num_sgprs; 373ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie ud_info->indirect = true; 374ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie ud_info->indirect_offset = indirect_offset; 375ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie} 376ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie#endif 377ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie 378c46c376977275a3327c42ad30ec4df4cb7a4b060Dave Airliestatic void create_function(struct nir_to_llvm_context *ctx) 379f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 380f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef arg_types[23]; 381f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned arg_idx = 0; 3820fafe94a3904f3e20a566077d49983d33b909b17Dave Airlie unsigned array_params_mask = 0; 383f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned sgpr_count = 0, user_sgpr_count; 384f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned i; 385dfef9c7c1fcf0070784d1a19386d885bb1b3f511Dave Airlie unsigned num_sets = ctx->options->layout ? ctx->options->layout->num_sets : 0; 386dfef9c7c1fcf0070784d1a19386d885bb1b3f511Dave Airlie unsigned user_sgpr_idx; 387c7dc1b010ae581f532240b661cb3d1c82e117e7eDave Airlie bool need_push_constants; 388c7dc1b010ae581f532240b661cb3d1c82e117e7eDave Airlie 389c7dc1b010ae581f532240b661cb3d1c82e117e7eDave Airlie need_push_constants = true; 390c7dc1b010ae581f532240b661cb3d1c82e117e7eDave Airlie if (!ctx->options->layout) 391c7dc1b010ae581f532240b661cb3d1c82e117e7eDave Airlie need_push_constants = false; 392c7dc1b010ae581f532240b661cb3d1c82e117e7eDave Airlie else if (!ctx->options->layout->push_constant_size && 393c7dc1b010ae581f532240b661cb3d1c82e117e7eDave Airlie !ctx->options->layout->dynamic_offset_count) 394c7dc1b010ae581f532240b661cb3d1c82e117e7eDave Airlie need_push_constants = false; 395b0e11a153c4b8cd9bf29bdb8e26a776de241a5b4Dave Airlie 396b0e11a153c4b8cd9bf29bdb8e26a776de241a5b4Dave Airlie /* 1 for each descriptor set */ 397dfef9c7c1fcf0070784d1a19386d885bb1b3f511Dave Airlie for (unsigned i = 0; i < num_sets; ++i) { 398dfef9c7c1fcf0070784d1a19386d885bb1b3f511Dave Airlie if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) { 3990fafe94a3904f3e20a566077d49983d33b909b17Dave Airlie array_params_mask |= (1 << arg_idx); 400dfef9c7c1fcf0070784d1a19386d885bb1b3f511Dave Airlie arg_types[arg_idx++] = const_array(ctx->i8, 1024 * 1024); 401dfef9c7c1fcf0070784d1a19386d885bb1b3f511Dave Airlie } 402dfef9c7c1fcf0070784d1a19386d885bb1b3f511Dave Airlie } 403f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 404c7dc1b010ae581f532240b661cb3d1c82e117e7eDave Airlie if (need_push_constants) { 405c7dc1b010ae581f532240b661cb3d1c82e117e7eDave Airlie /* 1 for push constants and dynamic descriptors */ 4060fafe94a3904f3e20a566077d49983d33b909b17Dave Airlie array_params_mask |= (1 << arg_idx); 407c7dc1b010ae581f532240b661cb3d1c82e117e7eDave Airlie arg_types[arg_idx++] = const_array(ctx->i8, 1024 * 1024); 408c7dc1b010ae581f532240b661cb3d1c82e117e7eDave Airlie } 409f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 410c46c376977275a3327c42ad30ec4df4cb7a4b060Dave Airlie switch (ctx->stage) { 411f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case MESA_SHADER_COMPUTE: 412f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3); /* grid size */ 413f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie user_sgpr_count = arg_idx; 414f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3); 415f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_types[arg_idx++] = ctx->i32; 416f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie sgpr_count = arg_idx; 417f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 418f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3); 419f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 420f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case MESA_SHADER_VERTEX: 421b0e11a153c4b8cd9bf29bdb8e26a776de241a5b4Dave Airlie arg_types[arg_idx++] = const_array(ctx->v16i8, 16); /* vertex buffers */ 422f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_types[arg_idx++] = ctx->i32; // base vertex 423f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_types[arg_idx++] = ctx->i32; // start instance 424f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie user_sgpr_count = sgpr_count = arg_idx; 425f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_types[arg_idx++] = ctx->i32; // vertex id 426f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_types[arg_idx++] = ctx->i32; // rel auto id 427f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_types[arg_idx++] = ctx->i32; // vs prim id 428f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_types[arg_idx++] = ctx->i32; // instance id 429f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 430f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case MESA_SHADER_FRAGMENT: 431b0e11a153c4b8cd9bf29bdb8e26a776de241a5b4Dave Airlie arg_types[arg_idx++] = const_array(ctx->f32, 32); /* sample positions */ 432f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie user_sgpr_count = arg_idx; 433f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_types[arg_idx++] = ctx->i32; /* prim mask */ 434f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie sgpr_count = arg_idx; 435f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_types[arg_idx++] = ctx->v2i32; /* persp sample */ 436f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_types[arg_idx++] = ctx->v2i32; /* persp center */ 437f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_types[arg_idx++] = ctx->v2i32; /* persp centroid */ 438f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_types[arg_idx++] = ctx->v3i32; /* persp pull model */ 439f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_types[arg_idx++] = ctx->v2i32; /* linear sample */ 440f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_types[arg_idx++] = ctx->v2i32; /* linear center */ 441f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_types[arg_idx++] = ctx->v2i32; /* linear centroid */ 442f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_types[arg_idx++] = ctx->f32; /* line stipple tex */ 443f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_types[arg_idx++] = ctx->f32; /* pos x float */ 444f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_types[arg_idx++] = ctx->f32; /* pos y float */ 445f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_types[arg_idx++] = ctx->f32; /* pos z float */ 446f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_types[arg_idx++] = ctx->f32; /* pos w float */ 447f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_types[arg_idx++] = ctx->i32; /* front face */ 448f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_types[arg_idx++] = ctx->i32; /* ancillary */ 449f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_types[arg_idx++] = ctx->f32; /* sample coverage */ 450f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_types[arg_idx++] = ctx->i32; /* fixed pt */ 451f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 452f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie default: 453f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unreachable("Shader stage not implemented"); 454f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 455f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 456f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->main_function = create_llvm_function( 457f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->context, ctx->module, ctx->builder, NULL, 0, arg_types, 4580fafe94a3904f3e20a566077d49983d33b909b17Dave Airlie arg_idx, array_params_mask, sgpr_count, ctx->options->unsafe_math); 459c46c376977275a3327c42ad30ec4df4cb7a4b060Dave Airlie set_llvm_calling_convention(ctx->main_function, ctx->stage); 460f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 461f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 462f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->shader_info->num_input_sgprs = 0; 463f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->shader_info->num_input_vgprs = 0; 464f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 465f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (i = 0; i < user_sgpr_count; i++) 466f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->shader_info->num_user_sgprs += llvm_get_type_size(arg_types[i]) / 4; 467f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 468f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->shader_info->num_input_sgprs = ctx->shader_info->num_user_sgprs; 469f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (; i < sgpr_count; i++) 470f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->shader_info->num_input_sgprs += llvm_get_type_size(arg_types[i]) / 4; 471f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 472c46c376977275a3327c42ad30ec4df4cb7a4b060Dave Airlie if (ctx->stage != MESA_SHADER_FRAGMENT) 473f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (; i < arg_idx; ++i) 474f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->shader_info->num_input_vgprs += llvm_get_type_size(arg_types[i]) / 4; 475f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 476f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_idx = 0; 477dfef9c7c1fcf0070784d1a19386d885bb1b3f511Dave Airlie user_sgpr_idx = 0; 478dfef9c7c1fcf0070784d1a19386d885bb1b3f511Dave Airlie for (unsigned i = 0; i < num_sets; ++i) { 479dfef9c7c1fcf0070784d1a19386d885bb1b3f511Dave Airlie if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) { 480dfef9c7c1fcf0070784d1a19386d885bb1b3f511Dave Airlie set_userdata_location(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], user_sgpr_idx, 2); 481dfef9c7c1fcf0070784d1a19386d885bb1b3f511Dave Airlie user_sgpr_idx += 2; 482dfef9c7c1fcf0070784d1a19386d885bb1b3f511Dave Airlie ctx->descriptor_sets[i] = 483dfef9c7c1fcf0070784d1a19386d885bb1b3f511Dave Airlie LLVMGetParam(ctx->main_function, arg_idx++); 484dfef9c7c1fcf0070784d1a19386d885bb1b3f511Dave Airlie } else 485dfef9c7c1fcf0070784d1a19386d885bb1b3f511Dave Airlie ctx->descriptor_sets[i] = NULL; 486ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie } 487f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 488c7dc1b010ae581f532240b661cb3d1c82e117e7eDave Airlie if (need_push_constants) { 489c7dc1b010ae581f532240b661cb3d1c82e117e7eDave Airlie ctx->push_constants = LLVMGetParam(ctx->main_function, arg_idx++); 490c7dc1b010ae581f532240b661cb3d1c82e117e7eDave Airlie set_userdata_location_shader(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx, 2); 491c7dc1b010ae581f532240b661cb3d1c82e117e7eDave Airlie user_sgpr_idx += 2; 492c7dc1b010ae581f532240b661cb3d1c82e117e7eDave Airlie } 493f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 494c46c376977275a3327c42ad30ec4df4cb7a4b060Dave Airlie switch (ctx->stage) { 495f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case MESA_SHADER_COMPUTE: 496dfef9c7c1fcf0070784d1a19386d885bb1b3f511Dave Airlie set_userdata_location_shader(ctx, AC_UD_CS_GRID_SIZE, user_sgpr_idx, 3); 497dfef9c7c1fcf0070784d1a19386d885bb1b3f511Dave Airlie user_sgpr_idx += 3; 498f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->num_work_groups = 499f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMGetParam(ctx->main_function, arg_idx++); 500f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->workgroup_ids = 501f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMGetParam(ctx->main_function, arg_idx++); 502f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->tg_size = 503f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMGetParam(ctx->main_function, arg_idx++); 504f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->local_invocation_ids = 505f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMGetParam(ctx->main_function, arg_idx++); 506f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 507f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case MESA_SHADER_VERTEX: 508dfef9c7c1fcf0070784d1a19386d885bb1b3f511Dave Airlie set_userdata_location_shader(ctx, AC_UD_VS_VERTEX_BUFFERS, user_sgpr_idx, 2); 509dfef9c7c1fcf0070784d1a19386d885bb1b3f511Dave Airlie user_sgpr_idx += 2; 510f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->vertex_buffers = LLVMGetParam(ctx->main_function, arg_idx++); 511dfef9c7c1fcf0070784d1a19386d885bb1b3f511Dave Airlie set_userdata_location_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, 2); 512dfef9c7c1fcf0070784d1a19386d885bb1b3f511Dave Airlie user_sgpr_idx += 2; 513f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->base_vertex = LLVMGetParam(ctx->main_function, arg_idx++); 514f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->start_instance = LLVMGetParam(ctx->main_function, arg_idx++); 515f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->vertex_id = LLVMGetParam(ctx->main_function, arg_idx++); 516f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->rel_auto_id = LLVMGetParam(ctx->main_function, arg_idx++); 517f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->vs_prim_id = LLVMGetParam(ctx->main_function, arg_idx++); 518f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->instance_id = LLVMGetParam(ctx->main_function, arg_idx++); 519f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 520f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case MESA_SHADER_FRAGMENT: 521dfef9c7c1fcf0070784d1a19386d885bb1b3f511Dave Airlie set_userdata_location_shader(ctx, AC_UD_PS_SAMPLE_POS, user_sgpr_idx, 2); 522dfef9c7c1fcf0070784d1a19386d885bb1b3f511Dave Airlie user_sgpr_idx += 2; 523f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->sample_positions = LLVMGetParam(ctx->main_function, arg_idx++); 524f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->prim_mask = LLVMGetParam(ctx->main_function, arg_idx++); 525f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->persp_sample = LLVMGetParam(ctx->main_function, arg_idx++); 526f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->persp_center = LLVMGetParam(ctx->main_function, arg_idx++); 527f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->persp_centroid = LLVMGetParam(ctx->main_function, arg_idx++); 528f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_idx++; 529f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->linear_sample = LLVMGetParam(ctx->main_function, arg_idx++); 530f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->linear_center = LLVMGetParam(ctx->main_function, arg_idx++); 531f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->linear_centroid = LLVMGetParam(ctx->main_function, arg_idx++); 532f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie arg_idx++; /* line stipple */ 533f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->frag_pos[0] = LLVMGetParam(ctx->main_function, arg_idx++); 534f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->frag_pos[1] = LLVMGetParam(ctx->main_function, arg_idx++); 535f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->frag_pos[2] = LLVMGetParam(ctx->main_function, arg_idx++); 536f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->frag_pos[3] = LLVMGetParam(ctx->main_function, arg_idx++); 537f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->front_face = LLVMGetParam(ctx->main_function, arg_idx++); 538f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->ancillary = LLVMGetParam(ctx->main_function, arg_idx++); 539f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 540f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie default: 541f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unreachable("Shader stage not implemented"); 542f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 543f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 544f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 545f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void setup_types(struct nir_to_llvm_context *ctx) 546f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 547f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef args[4]; 548f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 549f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->voidt = LLVMVoidTypeInContext(ctx->context); 550f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->i1 = LLVMIntTypeInContext(ctx->context, 1); 551f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->i8 = LLVMIntTypeInContext(ctx->context, 8); 552f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->i16 = LLVMIntTypeInContext(ctx->context, 16); 553f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->i32 = LLVMIntTypeInContext(ctx->context, 32); 554f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->i64 = LLVMIntTypeInContext(ctx->context, 64); 555f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->v2i32 = LLVMVectorType(ctx->i32, 2); 556f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->v3i32 = LLVMVectorType(ctx->i32, 3); 557f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->v4i32 = LLVMVectorType(ctx->i32, 4); 558f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->v8i32 = LLVMVectorType(ctx->i32, 8); 559f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->f32 = LLVMFloatTypeInContext(ctx->context); 560f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->f16 = LLVMHalfTypeInContext(ctx->context); 561f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->v2f32 = LLVMVectorType(ctx->f32, 2); 562f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->v4f32 = LLVMVectorType(ctx->f32, 4); 563f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->v16i8 = LLVMVectorType(ctx->i8, 16); 564f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 565f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->i32zero = LLVMConstInt(ctx->i32, 0, false); 566f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->i32one = LLVMConstInt(ctx->i32, 1, false); 567f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->f32zero = LLVMConstReal(ctx->f32, 0.0); 568f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->f32one = LLVMConstReal(ctx->f32, 1.0); 569f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 570f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[0] = ctx->f32zero; 571f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[1] = ctx->f32zero; 572f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[2] = ctx->f32zero; 573f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[3] = ctx->f32one; 574f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->v4f32empty = LLVMConstVector(args, 4); 575f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 576f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context, 577f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie "range", 5); 578f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context, 579f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie "invariant.load", 14); 580f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->uniform_md_kind = 581f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMGetMDKindIDInContext(ctx->context, "amdgpu.uniform", 14); 582f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0); 583f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 584f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[0] = LLVMConstReal(ctx->f32, 2.5); 585f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 586f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 587f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic int get_llvm_num_components(LLVMValueRef value) 588f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 589f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef type = LLVMTypeOf(value); 590f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind 591f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ? LLVMGetVectorSize(type) 592f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie : 1; 593f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return num_components; 594f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 595f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 596f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef llvm_extract_elem(struct nir_to_llvm_context *ctx, 597f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef value, 598f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int index) 599f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 600f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int count = get_llvm_num_components(value); 601f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 602f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie assert(index < count); 603f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (count == 1) 604f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return value; 605f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 606f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return LLVMBuildExtractElement(ctx->builder, value, 607f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, index, false), ""); 608f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 609f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 610f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef trim_vector(struct nir_to_llvm_context *ctx, 611f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef value, unsigned count) 612f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 613f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned num_components = get_llvm_num_components(value); 614f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (count == num_components) 615f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return value; 616f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 617f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef masks[] = { 618f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false), 619f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)}; 620f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 621f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (count == 1) 622f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return LLVMBuildExtractElement(ctx->builder, value, masks[0], 623f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ""); 624f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 625f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef swizzle = LLVMConstVector(masks, count); 626f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, ""); 627f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 628f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 629f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void 630f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliebuild_store_values_extended(struct nir_to_llvm_context *ctx, 631f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef *values, 632f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned value_count, 633f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned value_stride, 634f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef vec) 635f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 636f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuilderRef builder = ctx->builder; 637f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned i; 638f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 639f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (value_count == 1) { 640f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildStore(builder, vec, values[0]); 641f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return; 642f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 643f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 644f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (i = 0; i < value_count; i++) { 645f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef ptr = values[i * value_stride]; 646f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef index = LLVMConstInt(ctx->i32, i, false); 647f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, ""); 648f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildStore(builder, value, ptr); 649f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 650f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 651f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 652f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMTypeRef get_def_type(struct nir_to_llvm_context *ctx, 653f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_ssa_def *def) 654f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 655f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef type = LLVMIntTypeInContext(ctx->context, def->bit_size); 656f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (def->num_components > 1) { 657f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie type = LLVMVectorType(type, def->num_components); 658f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 659f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return type; 660f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 661f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 662f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef get_src(struct nir_to_llvm_context *ctx, nir_src src) 663f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 664f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie assert(src.is_ssa); 665f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct hash_entry *entry = _mesa_hash_table_search(ctx->defs, src.ssa); 666f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return (LLVMValueRef)entry->data; 667f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 668f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 669f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 670f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMBasicBlockRef get_block(struct nir_to_llvm_context *ctx, 671f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct nir_block *b) 672f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 673f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct hash_entry *entry = _mesa_hash_table_search(ctx->defs, b); 674f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return (LLVMBasicBlockRef)entry->data; 675f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 676f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 677f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef get_alu_src(struct nir_to_llvm_context *ctx, 678f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_alu_src src, 679f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned num_components) 680f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 681f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef value = get_src(ctx, src.src); 682f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie bool need_swizzle = false; 683f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 684f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie assert(value); 685f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef type = LLVMTypeOf(value); 686f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned src_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind 687f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ? LLVMGetVectorSize(type) 688f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie : 1; 689f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 690f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned i = 0; i < num_components; ++i) { 691f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie assert(src.swizzle[i] < src_components); 692f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (src.swizzle[i] != i) 693f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie need_swizzle = true; 694f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 695f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 696f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (need_swizzle || num_components != src_components) { 697f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef masks[] = { 698f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, src.swizzle[0], false), 699f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, src.swizzle[1], false), 700f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, src.swizzle[2], false), 701f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, src.swizzle[3], false)}; 702f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 703f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (src_components > 1 && num_components == 1) { 704f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie value = LLVMBuildExtractElement(ctx->builder, value, 705f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie masks[0], ""); 706f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else if (src_components == 1 && num_components > 1) { 707f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef values[] = {value, value, value, value}; 70838c67f77ed35423bdba5ee1ffd5ab6e5b1765f90Nicolai Hähnle value = ac_build_gather_values(&ctx->ac, values, num_components); 709f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else { 710f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef swizzle = LLVMConstVector(masks, num_components); 711f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie value = LLVMBuildShuffleVector(ctx->builder, value, value, 712f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie swizzle, ""); 713f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 714f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 715f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie assert(!src.negate); 716f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie assert(!src.abs); 717f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return value; 718f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 719f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 720f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef emit_int_cmp(struct nir_to_llvm_context *ctx, 721f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMIntPredicate pred, LLVMValueRef src0, 722f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src1) 723f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 724f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, ""); 725f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return LLVMBuildSelect(ctx->builder, result, 726f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, 0xFFFFFFFF, false), 727f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, 0, false), ""); 728f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 729f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 730f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef emit_float_cmp(struct nir_to_llvm_context *ctx, 731f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMRealPredicate pred, LLVMValueRef src0, 732f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src1) 733f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 734f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef result; 735f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src0 = to_float(ctx, src0); 736f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src1 = to_float(ctx, src1); 737f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, ""); 738f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return LLVMBuildSelect(ctx->builder, result, 739f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, 0xFFFFFFFF, false), 740f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, 0, false), ""); 741f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 742f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 743f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef emit_intrin_1f_param(struct nir_to_llvm_context *ctx, 744f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const char *intrin, 745f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src0) 746f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 747f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef params[] = { 748f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie to_float(ctx, src0), 749f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie }; 7502c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle return ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->f32, params, 1, AC_FUNC_ATTR_READNONE); 751f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 752f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 753f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef emit_intrin_2f_param(struct nir_to_llvm_context *ctx, 754f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const char *intrin, 755f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src0, LLVMValueRef src1) 756f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 757f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef params[] = { 758f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie to_float(ctx, src0), 759f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie to_float(ctx, src1), 760f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie }; 7612c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle return ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->f32, params, 2, AC_FUNC_ATTR_READNONE); 762f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 763f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 764f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef emit_intrin_3f_param(struct nir_to_llvm_context *ctx, 765f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const char *intrin, 766f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2) 767f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 768f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef params[] = { 769f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie to_float(ctx, src0), 770f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie to_float(ctx, src1), 771f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie to_float(ctx, src2), 772f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie }; 7732c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle return ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->f32, params, 3, AC_FUNC_ATTR_READNONE); 774f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 775f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 776f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef emit_bcsel(struct nir_to_llvm_context *ctx, 777f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2) 778f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 779f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, 780f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->i32zero, ""); 781f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return LLVMBuildSelect(ctx->builder, v, src1, src2, ""); 782f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 783f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 784f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef emit_find_lsb(struct nir_to_llvm_context *ctx, 785f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src0) 786f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 787f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef params[2] = { 788f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src0, 789f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 790f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* The value of 1 means that ffs(x=0) = undef, so LLVM won't 791f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * add special code to check for x=0. The reason is that 792f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * the LLVM behavior for x=0 is different from what we 793f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * need here. 794f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * 795f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * The hardware already implements the correct behavior. 796f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie */ 797f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, 1, false), 798f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie }; 7992c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle return ac_emit_llvm_intrinsic(&ctx->ac, "llvm.cttz.i32", ctx->i32, params, 2, AC_FUNC_ATTR_READNONE); 800f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 801f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 802f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef emit_ifind_msb(struct nir_to_llvm_context *ctx, 803f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src0) 804f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 8052c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle LLVMValueRef msb = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.flbit.i32", 806f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->i32, &src0, 1, 8072fdaf38c0106d87f89fa20fbe229e66d9a3ad14aDave Airlie AC_FUNC_ATTR_READNONE); 808f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 809f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* The HW returns the last bit index from MSB, but NIR wants 810f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * the index from LSB. Invert it by doing "31 - msb". */ 811f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false), 812f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie msb, ""); 813f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 814f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true); 815f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef cond = LLVMBuildOr(ctx->builder, 816f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildICmp(ctx->builder, LLVMIntEQ, 817f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src0, ctx->i32zero, ""), 818f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildICmp(ctx->builder, LLVMIntEQ, 819f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src0, all_ones, ""), ""); 820f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 821f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return LLVMBuildSelect(ctx->builder, cond, all_ones, msb, ""); 822f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 823f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 824f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef emit_ufind_msb(struct nir_to_llvm_context *ctx, 825f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src0) 826f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 827f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef args[2] = { 828f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src0, 829f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->i32one, 830f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie }; 8312c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle LLVMValueRef msb = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.ctlz.i32", 832f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->i32, args, ARRAY_SIZE(args), 8332fdaf38c0106d87f89fa20fbe229e66d9a3ad14aDave Airlie AC_FUNC_ATTR_READNONE); 834f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 835f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* The HW returns the last bit index from MSB, but NIR wants 836f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * the index from LSB. Invert it by doing "31 - msb". */ 837f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false), 838f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie msb, ""); 839f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 840f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return LLVMBuildSelect(ctx->builder, 841f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildICmp(ctx->builder, LLVMIntEQ, src0, 842f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->i32zero, ""), 843f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, -1, true), msb, ""); 844f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 845f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 846f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef emit_minmax_int(struct nir_to_llvm_context *ctx, 847f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMIntPredicate pred, 848f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src0, LLVMValueRef src1) 849f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 850f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return LLVMBuildSelect(ctx->builder, 851f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildICmp(ctx->builder, pred, src0, src1, ""), 852f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src0, 853f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src1, ""); 854f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 855f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 856f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef emit_iabs(struct nir_to_llvm_context *ctx, 857f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src0) 858f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 859f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return emit_minmax_int(ctx, LLVMIntSGT, src0, 860f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildNeg(ctx->builder, src0, "")); 861f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 862f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 863f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef emit_fsign(struct nir_to_llvm_context *ctx, 864f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src0) 865f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 866f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef cmp, val; 867f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 868f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, ctx->f32zero, ""); 869f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie val = LLVMBuildSelect(ctx->builder, cmp, ctx->f32one, src0, ""); 870f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, ctx->f32zero, ""); 871f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstReal(ctx->f32, -1.0), ""); 872f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return val; 873f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 874f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 875f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef emit_isign(struct nir_to_llvm_context *ctx, 876f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src0) 877f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 878f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef cmp, val; 879f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 880f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, ctx->i32zero, ""); 881f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie val = LLVMBuildSelect(ctx->builder, cmp, ctx->i32one, src0, ""); 882f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, ctx->i32zero, ""); 883f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstInt(ctx->i32, -1, true), ""); 884f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return val; 885f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 886f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 887f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef emit_ffract(struct nir_to_llvm_context *ctx, 888f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src0) 889f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 890f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const char *intr = "llvm.floor.f32"; 891f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef fsrc0 = to_float(ctx, src0); 892f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef params[] = { 893f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie fsrc0, 894f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie }; 8952c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle LLVMValueRef floor = ac_emit_llvm_intrinsic(&ctx->ac, intr, 896f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->f32, params, 1, 8972fdaf38c0106d87f89fa20fbe229e66d9a3ad14aDave Airlie AC_FUNC_ATTR_READNONE); 898f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return LLVMBuildFSub(ctx->builder, fsrc0, floor, ""); 899f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 900f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 901f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef emit_uint_carry(struct nir_to_llvm_context *ctx, 902f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const char *intrin, 903f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src0, LLVMValueRef src1) 904f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 905f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef ret_type; 906f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef types[] = { ctx->i32, ctx->i1 }; 907f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef res; 908f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef params[] = { src0, src1 }; 909f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ret_type = LLVMStructTypeInContext(ctx->context, types, 910f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2, true); 911f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 9122c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle res = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ret_type, 9132fdaf38c0106d87f89fa20fbe229e66d9a3ad14aDave Airlie params, 2, AC_FUNC_ATTR_READNONE); 914f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 915f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie res = LLVMBuildExtractValue(ctx->builder, res, 1, ""); 916f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie res = LLVMBuildZExt(ctx->builder, res, ctx->i32, ""); 917f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return res; 918f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 919f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 920f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef emit_b2f(struct nir_to_llvm_context *ctx, 921f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src0) 922f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 923f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), ""); 924f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 925f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 926f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef emit_umul_high(struct nir_to_llvm_context *ctx, 927f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src0, LLVMValueRef src1) 928f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 929f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef dst64, result; 930f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, ""); 931f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, ""); 932f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 933f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie dst64 = LLVMBuildMul(ctx->builder, src0, src1, ""); 934f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), ""); 935f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, ""); 936f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return result; 937f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 938f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 939f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef emit_imul_high(struct nir_to_llvm_context *ctx, 940f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src0, LLVMValueRef src1) 941f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 942f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef dst64, result; 943f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, ""); 944f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, ""); 945f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 946f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie dst64 = LLVMBuildMul(ctx->builder, src0, src1, ""); 947f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), ""); 948f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, ""); 949f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return result; 950f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 951f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 952f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef emit_bitfield_extract(struct nir_to_llvm_context *ctx, 953f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const char *intrin, 954f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef srcs[3]) 955f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 956f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef result; 957f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), ""); 9582c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32, srcs, 3, AC_FUNC_ATTR_READNONE); 959f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 960f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, ""); 961f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return result; 962f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 963f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 964f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef emit_bitfield_insert(struct nir_to_llvm_context *ctx, 965f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src0, LLVMValueRef src1, 966f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src2, LLVMValueRef src3) 967f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 968f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef bfi_args[3], result; 969f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 970f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie bfi_args[0] = LLVMBuildShl(ctx->builder, 971f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildSub(ctx->builder, 972f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildShl(ctx->builder, 973f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->i32one, 974f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src3, ""), 975f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->i32one, ""), 976f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src2, ""); 977f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie bfi_args[1] = LLVMBuildShl(ctx->builder, src1, src2, ""); 978f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie bfi_args[2] = src0; 979f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 980f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, src3, LLVMConstInt(ctx->i32, 32, false), ""); 981f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 982f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* Calculate: 983f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2) 984f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * Use the right-hand side, which the LLVM backend can convert to V_BFI. 985f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie */ 986f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildXor(ctx->builder, bfi_args[2], 987f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildAnd(ctx->builder, bfi_args[0], 988f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildXor(ctx->builder, bfi_args[1], bfi_args[2], ""), ""), ""); 989f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 990f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildSelect(ctx->builder, icond, src1, result, ""); 991f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return result; 992f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 993f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 994f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef emit_pack_half_2x16(struct nir_to_llvm_context *ctx, 995f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src0) 996f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 997f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false); 998f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int i; 999f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef comp[2]; 1000f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1001f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src0 = to_float(ctx, src0); 1002f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32zero, ""); 1003f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32one, ""); 1004f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (i = 0; i < 2; i++) { 1005f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie comp[i] = LLVMBuildFPTrunc(ctx->builder, comp[i], ctx->f16, ""); 1006f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie comp[i] = LLVMBuildBitCast(ctx->builder, comp[i], ctx->i16, ""); 1007f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie comp[i] = LLVMBuildZExt(ctx->builder, comp[i], ctx->i32, ""); 1008f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 1009f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1010f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, ""); 1011f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie comp[0] = LLVMBuildOr(ctx->builder, comp[0], comp[1], ""); 1012f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1013f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return comp[0]; 1014f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 1015f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1016f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef emit_unpack_half_2x16(struct nir_to_llvm_context *ctx, 1017f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src0) 1018f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 1019f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false); 1020f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef temps[2], result, val; 1021f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int i; 1022f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1023f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (i = 0; i < 2; i++) { 1024f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0; 1025f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, ""); 1026f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, ""); 1027f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, ""); 1028f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 1029f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1030f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), temps[0], 1031f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->i32zero, ""); 1032f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildInsertElement(ctx->builder, result, temps[1], 1033f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->i32one, ""); 1034f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return result; 1035f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 1036f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1037f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie/** 1038f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * Set range metadata on an instruction. This can only be used on load and 1039f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * call instructions. If you know an instruction can only produce the values 1040f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * 0, 1, 2, you would do set_range_metadata(value, 0, 3); 1041f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * \p lo is the minimum value inclusive. 1042f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * \p hi is the maximum value exclusive. 1043f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie */ 1044f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void set_range_metadata(struct nir_to_llvm_context *ctx, 1045f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef value, unsigned lo, unsigned hi) 1046f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 1047f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef range_md, md_args[2]; 1048f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef type = LLVMTypeOf(value); 1049f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMContextRef context = LLVMGetTypeContext(type); 1050f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1051f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie md_args[0] = LLVMConstInt(type, lo, false); 1052f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie md_args[1] = LLVMConstInt(type, hi, false); 1053f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie range_md = LLVMMDNodeInContext(context, md_args, 2); 1054f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMSetMetadata(value, ctx->range_md_kind, range_md); 1055f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 1056f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1057f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef get_thread_id(struct nir_to_llvm_context *ctx) 1058f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 1059f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef tid; 1060f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef tid_args[2]; 1061f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tid_args[0] = LLVMConstInt(ctx->i32, 0xffffffff, false); 1062f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tid_args[1] = ctx->i32zero; 10632c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle tid_args[1] = ac_emit_llvm_intrinsic(&ctx->ac, 1064f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie "llvm.amdgcn.mbcnt.lo", ctx->i32, 10652fdaf38c0106d87f89fa20fbe229e66d9a3ad14aDave Airlie tid_args, 2, AC_FUNC_ATTR_READNONE); 1066f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 10672c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle tid = ac_emit_llvm_intrinsic(&ctx->ac, 1068f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie "llvm.amdgcn.mbcnt.hi", ctx->i32, 10692fdaf38c0106d87f89fa20fbe229e66d9a3ad14aDave Airlie tid_args, 2, AC_FUNC_ATTR_READNONE); 1070f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie set_range_metadata(ctx, tid, 0, 64); 1071f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return tid; 1072f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 1073f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1074f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie/* 1075f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * SI implements derivatives using the local data store (LDS) 1076f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * All writes to the LDS happen in all executing threads at 1077f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * the same time. TID is the Thread ID for the current 1078f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * thread and is a value between 0 and 63, representing 1079f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * the thread's position in the wavefront. 1080f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * 1081f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * For the pixel shader threads are grouped into quads of four pixels. 1082f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * The TIDs of the pixels of a quad are: 1083f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * 1084f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * +------+------+ 1085f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * |4n + 0|4n + 1| 1086f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * +------+------+ 1087f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * |4n + 2|4n + 3| 1088f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * +------+------+ 1089f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * 1090f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * So, masking the TID with 0xfffffffc yields the TID of the top left pixel 1091f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * of the quad, masking with 0xfffffffd yields the TID of the top pixel of 1092f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * the current pixel's column, and masking with 0xfffffffe yields the TID 1093f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * of the left pixel of the current pixel's row. 1094f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * 1095f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * Adding 1 yields the TID of the pixel to the right of the left pixel, and 1096f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * adding 2 yields the TID of the pixel below the top pixel. 1097f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie */ 1098f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie/* masks for thread ID. */ 1099f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie#define TID_MASK_TOP_LEFT 0xfffffffc 1100f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie#define TID_MASK_TOP 0xfffffffd 1101f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie#define TID_MASK_LEFT 0xfffffffe 1102f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef emit_ddxy(struct nir_to_llvm_context *ctx, 11035697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie nir_op op, 1104f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src0) 1105f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 1106f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef tl, trbl, result; 1107f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef tl_tid, trbl_tid; 1108f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef args[2]; 11095697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie LLVMValueRef thread_id; 1110f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned mask; 1111f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int idx; 1112f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->has_ddxy = true; 11135697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie 11145697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie if (!ctx->lds && !ctx->has_ds_bpermute) 1115f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->lds = LLVMAddGlobalInAddressSpace(ctx->module, 1116f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMArrayType(ctx->i32, 64), 1117f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie "ddxy_lds", LOCAL_ADDR_SPACE); 1118f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 11195697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie thread_id = get_thread_id(ctx); 11205697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie if (op == nir_op_fddx_fine || op == nir_op_fddx) 1121f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie mask = TID_MASK_LEFT; 11225697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie else if (op == nir_op_fddy_fine || op == nir_op_fddy) 1123f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie mask = TID_MASK_TOP; 1124f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie else 1125f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie mask = TID_MASK_TOP_LEFT; 1126f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 11275697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie tl_tid = LLVMBuildAnd(ctx->builder, thread_id, 1128f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, mask, false), ""); 1129f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* for DDX we want to next X pixel, DDY next Y pixel. */ 11305697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie if (op == nir_op_fddx_fine || 11315697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie op == nir_op_fddx_coarse || 11325697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie op == nir_op_fddx) 1133f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie idx = 1; 1134f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie else 1135f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie idx = 2; 1136f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 11375697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid, 1138f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, idx, false), ""); 1139f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 11405697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie if (ctx->has_ds_bpermute) { 1141f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[0] = LLVMBuildMul(ctx->builder, tl_tid, 1142f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, 4, false), ""); 1143f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[1] = src0; 11442c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle tl = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.ds.bpermute", 1145f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->i32, args, 2, 11462fdaf38c0106d87f89fa20fbe229e66d9a3ad14aDave Airlie AC_FUNC_ATTR_READNONE); 1147f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1148f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[0] = LLVMBuildMul(ctx->builder, trbl_tid, 1149f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, 4, false), ""); 11502c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle trbl = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.ds.bpermute", 1151f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->i32, args, 2, 11522fdaf38c0106d87f89fa20fbe229e66d9a3ad14aDave Airlie AC_FUNC_ATTR_READNONE); 1153f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else { 11545697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie LLVMValueRef store_ptr, load_ptr0, load_ptr1; 11555697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie 11565697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie store_ptr = build_gep0(ctx, ctx->lds, thread_id); 11575697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie load_ptr0 = build_gep0(ctx, ctx->lds, tl_tid); 11585697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie load_ptr1 = build_gep0(ctx, ctx->lds, trbl_tid); 1159f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 11605697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie LLVMBuildStore(ctx->builder, src0, store_ptr); 1161f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tl = LLVMBuildLoad(ctx->builder, load_ptr0, ""); 1162f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie trbl = LLVMBuildLoad(ctx->builder, load_ptr1, ""); 1163f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 1164f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, ""); 1165f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, ""); 1166f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildFSub(ctx->builder, trbl, tl, ""); 1167f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return result; 1168f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 1169f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1170f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie/* 1171f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * this takes an I,J coordinate pair, 1172f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * and works out the X and Y derivatives. 1173f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * it returns DDX(I), DDX(J), DDY(I), DDY(J). 1174f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie */ 1175f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef emit_ddxy_interp( 1176f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct nir_to_llvm_context *ctx, 1177f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef interp_ij) 1178f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 11795697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie LLVMValueRef result[4], a; 11805697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie unsigned i; 1181f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 11825697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie for (i = 0; i < 2; i++) { 11835697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie a = LLVMBuildExtractElement(ctx->builder, interp_ij, 11845697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie LLVMConstInt(ctx->i32, i, false), ""); 11855697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie result[i] = emit_ddxy(ctx, nir_op_fddx, a); 11865697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie result[2+i] = emit_ddxy(ctx, nir_op_fddy, a); 1187f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 118838c67f77ed35423bdba5ee1ffd5ab6e5b1765f90Nicolai Hähnle return ac_build_gather_values(&ctx->ac, result, 4); 1189f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 1190f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1191f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr) 1192f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 1193f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src[4], result = NULL; 1194f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned num_components = instr->dest.dest.ssa.num_components; 1195f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned src_components; 1196f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1197f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src)); 1198f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie switch (instr->op) { 1199f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_vec2: 1200f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_vec3: 1201f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_vec4: 1202f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src_components = 1; 1203f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1204f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_pack_half_2x16: 1205f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src_components = 2; 1206f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1207f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_unpack_half_2x16: 1208f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src_components = 1; 1209f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1210f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie default: 1211f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src_components = num_components; 1212f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1213f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 1214f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) 1215f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src[i] = get_alu_src(ctx, instr->src[i], src_components); 1216f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1217f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie switch (instr->op) { 1218f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_fmov: 1219f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_imov: 1220f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = src[0]; 1221f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1222f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_fneg: 1223f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src[0] = to_float(ctx, src[0]); 1224f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildFNeg(ctx->builder, src[0], ""); 1225f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1226f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_ineg: 1227f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildNeg(ctx->builder, src[0], ""); 1228f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1229f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_inot: 1230f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildNot(ctx->builder, src[0], ""); 1231f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1232f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_iadd: 1233f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildAdd(ctx->builder, src[0], src[1], ""); 1234f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1235f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_fadd: 1236f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src[0] = to_float(ctx, src[0]); 1237f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src[1] = to_float(ctx, src[1]); 1238f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildFAdd(ctx->builder, src[0], src[1], ""); 1239f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1240f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_fsub: 1241f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src[0] = to_float(ctx, src[0]); 1242f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src[1] = to_float(ctx, src[1]); 1243f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildFSub(ctx->builder, src[0], src[1], ""); 1244f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1245f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_isub: 1246f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildSub(ctx->builder, src[0], src[1], ""); 1247f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1248f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_imul: 1249f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildMul(ctx->builder, src[0], src[1], ""); 1250f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1251f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_imod: 1252f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildSRem(ctx->builder, src[0], src[1], ""); 1253f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1254f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_umod: 1255f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildURem(ctx->builder, src[0], src[1], ""); 1256f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1257f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_fmod: 1258f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src[0] = to_float(ctx, src[0]); 1259f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src[1] = to_float(ctx, src[1]); 12601007047ca1086eef89f65c87e7ea2371c4f80513Nicolai Hähnle result = ac_emit_fdiv(&ctx->ac, src[0], src[1]); 1261f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_intrin_1f_param(ctx, "llvm.floor.f32", result); 1262f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildFMul(ctx->builder, src[1] , result, ""); 1263f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildFSub(ctx->builder, src[0], result, ""); 1264f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1265f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_frem: 1266f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src[0] = to_float(ctx, src[0]); 1267f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src[1] = to_float(ctx, src[1]); 1268f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildFRem(ctx->builder, src[0], src[1], ""); 1269f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1270bbb4562def6e91e69b1613b1c8d72d4a125c7d45Dave Airlie case nir_op_irem: 1271bbb4562def6e91e69b1613b1c8d72d4a125c7d45Dave Airlie result = LLVMBuildSRem(ctx->builder, src[0], src[1], ""); 1272bbb4562def6e91e69b1613b1c8d72d4a125c7d45Dave Airlie break; 1273f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_idiv: 1274f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildSDiv(ctx->builder, src[0], src[1], ""); 1275f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1276f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_udiv: 1277f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildUDiv(ctx->builder, src[0], src[1], ""); 1278f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1279f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_fmul: 1280f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src[0] = to_float(ctx, src[0]); 1281f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src[1] = to_float(ctx, src[1]); 1282f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildFMul(ctx->builder, src[0], src[1], ""); 1283f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1284f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_fdiv: 1285f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src[0] = to_float(ctx, src[0]); 1286f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src[1] = to_float(ctx, src[1]); 12871007047ca1086eef89f65c87e7ea2371c4f80513Nicolai Hähnle result = ac_emit_fdiv(&ctx->ac, src[0], src[1]); 1288f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1289f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_frcp: 1290f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src[0] = to_float(ctx, src[0]); 12911007047ca1086eef89f65c87e7ea2371c4f80513Nicolai Hähnle result = ac_emit_fdiv(&ctx->ac, ctx->f32one, src[0]); 1292f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1293f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_iand: 1294f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildAnd(ctx->builder, src[0], src[1], ""); 1295f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1296f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_ior: 1297f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildOr(ctx->builder, src[0], src[1], ""); 1298f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1299f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_ixor: 1300f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildXor(ctx->builder, src[0], src[1], ""); 1301f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1302f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_ishl: 1303f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildShl(ctx->builder, src[0], src[1], ""); 1304f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1305f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_ishr: 1306f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildAShr(ctx->builder, src[0], src[1], ""); 1307f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1308f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_ushr: 1309f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildLShr(ctx->builder, src[0], src[1], ""); 1310f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1311f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_ilt: 1312f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_int_cmp(ctx, LLVMIntSLT, src[0], src[1]); 1313f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1314f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_ine: 1315f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_int_cmp(ctx, LLVMIntNE, src[0], src[1]); 1316f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1317f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_ieq: 1318f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_int_cmp(ctx, LLVMIntEQ, src[0], src[1]); 1319f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1320f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_ige: 1321f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_int_cmp(ctx, LLVMIntSGE, src[0], src[1]); 1322f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1323f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_ult: 1324f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_int_cmp(ctx, LLVMIntULT, src[0], src[1]); 1325f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1326f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_uge: 1327f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_int_cmp(ctx, LLVMIntUGE, src[0], src[1]); 1328f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1329f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_feq: 1330f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_float_cmp(ctx, LLVMRealUEQ, src[0], src[1]); 1331f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1332f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_fne: 1333f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_float_cmp(ctx, LLVMRealUNE, src[0], src[1]); 1334f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1335f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_flt: 1336f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_float_cmp(ctx, LLVMRealULT, src[0], src[1]); 1337f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1338f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_fge: 1339f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_float_cmp(ctx, LLVMRealUGE, src[0], src[1]); 1340f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1341f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_fabs: 1342f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_intrin_1f_param(ctx, "llvm.fabs.f32", src[0]); 1343f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1344f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_iabs: 1345f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_iabs(ctx, src[0]); 1346f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1347f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_imax: 1348f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_minmax_int(ctx, LLVMIntSGT, src[0], src[1]); 1349f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1350f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_imin: 1351f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_minmax_int(ctx, LLVMIntSLT, src[0], src[1]); 1352f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1353f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_umax: 1354f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_minmax_int(ctx, LLVMIntUGT, src[0], src[1]); 1355f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1356f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_umin: 1357f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_minmax_int(ctx, LLVMIntULT, src[0], src[1]); 1358f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1359f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_isign: 1360f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_isign(ctx, src[0]); 1361f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1362f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_fsign: 1363f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src[0] = to_float(ctx, src[0]); 1364f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_fsign(ctx, src[0]); 1365f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1366f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_ffloor: 1367f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_intrin_1f_param(ctx, "llvm.floor.f32", src[0]); 1368f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1369f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_ftrunc: 1370f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_intrin_1f_param(ctx, "llvm.trunc.f32", src[0]); 1371f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1372f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_fceil: 1373f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_intrin_1f_param(ctx, "llvm.ceil.f32", src[0]); 1374f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1375f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_fround_even: 1376f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_intrin_1f_param(ctx, "llvm.rint.f32", src[0]); 1377f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1378f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_ffract: 1379f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_ffract(ctx, src[0]); 1380f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1381f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_fsin: 1382f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_intrin_1f_param(ctx, "llvm.sin.f32", src[0]); 1383f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1384f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_fcos: 1385f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_intrin_1f_param(ctx, "llvm.cos.f32", src[0]); 1386f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1387f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_fsqrt: 1388f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_intrin_1f_param(ctx, "llvm.sqrt.f32", src[0]); 1389f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1390f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_fexp2: 1391f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_intrin_1f_param(ctx, "llvm.exp2.f32", src[0]); 1392f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1393f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_flog2: 1394f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_intrin_1f_param(ctx, "llvm.log2.f32", src[0]); 1395f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1396f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_frsq: 1397f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_intrin_1f_param(ctx, "llvm.sqrt.f32", src[0]); 13981007047ca1086eef89f65c87e7ea2371c4f80513Nicolai Hähnle result = ac_emit_fdiv(&ctx->ac, ctx->f32one, result); 1399f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1400f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_fpow: 1401f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_intrin_2f_param(ctx, "llvm.pow.f32", src[0], src[1]); 1402f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1403f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_fmax: 1404f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_intrin_2f_param(ctx, "llvm.maxnum.f32", src[0], src[1]); 1405f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1406f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_fmin: 1407f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_intrin_2f_param(ctx, "llvm.minnum.f32", src[0], src[1]); 1408f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1409f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_ffma: 1410f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_intrin_3f_param(ctx, "llvm.fma.f32", src[0], src[1], src[2]); 1411f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1412f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_ibitfield_extract: 1413f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.i32", src); 1414f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1415f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_ubitfield_extract: 1416f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.u32", src); 1417f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1418f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_bitfield_insert: 1419f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_bitfield_insert(ctx, src[0], src[1], src[2], src[3]); 1420f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1421f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_bitfield_reverse: 14222c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE); 1423f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1424f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_bit_count: 14252c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE); 1426f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1427f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_vec2: 1428f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_vec3: 1429f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_vec4: 1430f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) 1431f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src[i] = to_integer(ctx, src[i]); 143238c67f77ed35423bdba5ee1ffd5ab6e5b1765f90Nicolai Hähnle result = ac_build_gather_values(&ctx->ac, src, num_components); 1433f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1434f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_f2i: 1435f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src[0] = to_float(ctx, src[0]); 1436f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildFPToSI(ctx->builder, src[0], ctx->i32, ""); 1437f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1438f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_f2u: 1439f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src[0] = to_float(ctx, src[0]); 1440f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildFPToUI(ctx->builder, src[0], ctx->i32, ""); 1441f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1442f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_i2f: 1443f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildSIToFP(ctx->builder, src[0], ctx->f32, ""); 1444f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1445f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_u2f: 1446f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildUIToFP(ctx->builder, src[0], ctx->f32, ""); 1447f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1448f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_bcsel: 1449f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_bcsel(ctx, src[0], src[1], src[2]); 1450f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1451f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_find_lsb: 1452f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_find_lsb(ctx, src[0]); 1453f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1454f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_ufind_msb: 1455f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_ufind_msb(ctx, src[0]); 1456f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1457f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_ifind_msb: 1458f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_ifind_msb(ctx, src[0]); 1459f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1460f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_uadd_carry: 1461f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_uint_carry(ctx, "llvm.uadd.with.overflow.i32", src[0], src[1]); 1462f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1463f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_usub_borrow: 1464f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_uint_carry(ctx, "llvm.usub.with.overflow.i32", src[0], src[1]); 1465f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1466f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_b2f: 1467f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_b2f(ctx, src[0]); 1468f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1469f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_fquantize2f16: 1470f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src[0] = to_float(ctx, src[0]); 1471f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildFPTrunc(ctx->builder, src[0], ctx->f16, ""); 1472f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* need to convert back up to f32 */ 1473f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, ""); 1474f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1475f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_umul_high: 1476f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_umul_high(ctx, src[0], src[1]); 1477f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1478f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_imul_high: 1479f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_imul_high(ctx, src[0], src[1]); 1480f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1481f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_pack_half_2x16: 1482f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_pack_half_2x16(ctx, src[0]); 1483f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1484f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_unpack_half_2x16: 1485f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = emit_unpack_half_2x16(ctx, src[0]); 1486f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1487f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_fddx: 1488f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_fddy: 1489f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_fddx_fine: 1490f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_fddy_fine: 1491f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_fddx_coarse: 1492f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_op_fddy_coarse: 14935697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie result = emit_ddxy(ctx, instr->op, src[0]); 1494f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1495f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie default: 1496f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie fprintf(stderr, "Unknown NIR alu instr: "); 1497f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_print_instr(&instr->instr, stderr); 1498f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie fprintf(stderr, "\n"); 1499f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie abort(); 1500f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 1501f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1502f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (result) { 1503f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie assert(instr->dest.dest.is_ssa); 1504f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = to_integer(ctx, result); 1505f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie _mesa_hash_table_insert(ctx->defs, &instr->dest.dest.ssa, 1506f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result); 1507f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 1508f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 1509f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1510f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void visit_load_const(struct nir_to_llvm_context *ctx, 1511f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_load_const_instr *instr) 1512f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 1513f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef values[4], value = NULL; 1514f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef element_type = 1515f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMIntTypeInContext(ctx->context, instr->def.bit_size); 1516f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1517f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned i = 0; i < instr->def.num_components; ++i) { 1518f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie switch (instr->def.bit_size) { 1519f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case 32: 1520f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie values[i] = LLVMConstInt(element_type, 1521f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie instr->value.u32[i], false); 1522f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1523f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case 64: 1524f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie values[i] = LLVMConstInt(element_type, 1525f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie instr->value.u64[i], false); 1526f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1527f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie default: 1528f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie fprintf(stderr, 1529f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie "unsupported nir load_const bit_size: %d\n", 1530f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie instr->def.bit_size); 1531f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie abort(); 1532f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 1533f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 1534f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (instr->def.num_components > 1) { 1535f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie value = LLVMConstVector(values, instr->def.num_components); 1536f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else 1537f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie value = values[0]; 1538f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1539f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie _mesa_hash_table_insert(ctx->defs, &instr->def, value); 1540f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 1541f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1542f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef cast_ptr(struct nir_to_llvm_context *ctx, LLVMValueRef ptr, 1543f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef type) 1544f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 1545f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); 1546f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return LLVMBuildBitCast(ctx->builder, ptr, 1547f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMPointerType(type, addr_space), ""); 1548f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 1549f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1550f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef 1551f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlieget_buffer_size(struct nir_to_llvm_context *ctx, LLVMValueRef descriptor, bool in_elements) 1552f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 1553f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef size = 1554f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildExtractElement(ctx->builder, descriptor, 1555f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, 2, false), ""); 1556f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1557f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* VI only */ 1558f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (ctx->options->chip_class >= VI && in_elements) { 1559f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* On VI, the descriptor contains the size in bytes, 1560f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * but TXQ must return the size in elements. 1561f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * The stride is always non-zero for resources using TXQ. 1562f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie */ 1563f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef stride = 1564f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildExtractElement(ctx->builder, descriptor, 1565f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, 1, false), ""); 1566f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie stride = LLVMBuildLShr(ctx->builder, stride, 1567f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, 16, false), ""); 1568f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie stride = LLVMBuildAnd(ctx->builder, stride, 1569f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, 0x3fff, false), ""); 1570f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1571f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie size = LLVMBuildUDiv(ctx->builder, size, stride, ""); 1572f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 1573f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return size; 1574f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 1575f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1576f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie/** 1577f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with 1578f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * intrinsic names). 1579f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie */ 1580f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void build_int_type_name( 1581f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef type, 1582f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie char *buf, unsigned bufsize) 1583f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 1584f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie assert(bufsize >= 6); 1585f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1586f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) 1587f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie snprintf(buf, bufsize, "v%ui32", 1588f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMGetVectorSize(type)); 1589f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie else 1590f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie strcpy(buf, "i32"); 1591f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 1592f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1593f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx, 1594f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct ac_tex_info *tinfo, 1595f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_tex_instr *instr, 1596f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const char *intr_name, 1597f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned coord_vgpr_index) 1598f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 1599f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef coord = tinfo->args[0]; 1600f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef half_texel[2]; 1601f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int c; 1602f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1603f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie //TODO Rect 1604f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie { 1605f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef txq_args[10]; 1606f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int txq_arg_count = 0; 1607f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef size; 1608f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE; 1609f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, false); 1610f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie txq_args[txq_arg_count++] = tinfo->args[1]; 1611f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0xf, 0); /* dmask */ 1612f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* unorm */ 1613f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */ 1614f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, da ? 1 : 0, 0); 1615f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */ 1616f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */ 1617f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */ 1618f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */ 16192c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle size = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32, 1620f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie txq_args, txq_arg_count, 16212fdaf38c0106d87f89fa20fbe229e66d9a3ad14aDave Airlie AC_FUNC_ATTR_READNONE); 1622f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1623f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (c = 0; c < 2; c++) { 1624f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie half_texel[c] = LLVMBuildExtractElement(ctx->builder, size, 1625713522fb8d4366d29be18edc3d5f33faba1cb7c4Dave Airlie LLVMConstInt(ctx->i32, c, false), ""); 1626f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, ""); 16271007047ca1086eef89f65c87e7ea2371c4f80513Nicolai Hähnle half_texel[c] = ac_emit_fdiv(&ctx->ac, ctx->f32one, half_texel[c]); 1628f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c], 1629f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstReal(ctx->f32, -0.5), ""); 1630f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 1631f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 1632f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1633f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (c = 0; c < 2; c++) { 1634f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef tmp; 1635f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0); 1636f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tmp = LLVMBuildExtractElement(ctx->builder, coord, index, ""); 1637f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, ""); 1638f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], ""); 1639f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, ""); 1640f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, ""); 1641f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 1642f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1643f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tinfo->args[0] = coord; 16442c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count, 16452fdaf38c0106d87f89fa20fbe229e66d9a3ad14aDave Airlie AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND); 1646f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1647f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 1648f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1649f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx, 1650f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_tex_instr *instr, 1651f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct ac_tex_info *tinfo) 1652f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 1653f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const char *name = "llvm.SI.image.sample"; 1654f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const char *infix = ""; 1655f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie char intr_name[127]; 1656f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie char type[64]; 1657f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie bool is_shadow = instr->is_shadow; 1658f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie bool has_offset = tinfo->has_offset; 1659f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie switch (instr->op) { 1660f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_texop_txf: 1661f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_texop_txf_ms: 1662f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_texop_samples_identical: 1663f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie name = instr->sampler_dim == GLSL_SAMPLER_DIM_MS ? "llvm.SI.image.load" : 1664f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? "llvm.SI.vs.load.input" : 1665f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie "llvm.SI.image.load.mip"; 1666f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie is_shadow = false; 1667f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie has_offset = false; 1668f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1669f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_texop_txb: 1670f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie infix = ".b"; 1671f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1672f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_texop_txl: 1673f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie infix = ".l"; 1674f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1675f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_texop_txs: 1676f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie name = "llvm.SI.getresinfo"; 1677f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1678f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_texop_query_levels: 1679f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie name = "llvm.SI.getresinfo"; 1680f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1681f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_texop_tex: 1682f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (ctx->stage != MESA_SHADER_FRAGMENT) 1683f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie infix = ".lz"; 1684f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1685f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_texop_txd: 1686f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie infix = ".d"; 1687f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1688f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_texop_tg4: 1689f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie name = "llvm.SI.gather4"; 1690f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie infix = ".lz"; 1691f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1692f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_texop_lod: 1693f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie name = "llvm.SI.getlod"; 1694f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie is_shadow = false; 1695f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie has_offset = false; 1696f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1697f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie default: 1698f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1699f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 1700f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1701f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie build_int_type_name(LLVMTypeOf(tinfo->args[0]), type, sizeof(type)); 1702f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie sprintf(intr_name, "%s%s%s%s.%s", name, is_shadow ? ".c" : "", infix, 1703f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie has_offset ? ".o" : "", type); 1704f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1705f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (instr->op == nir_texop_tg4) { 1706f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type); 1707f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) { 1708f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return radv_lower_gather4_integer(ctx, tinfo, instr, intr_name, 1709f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie (int)has_offset + (int)is_shadow); 1710f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 1711f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 17122c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count, 17132fdaf38c0106d87f89fa20fbe229e66d9a3ad14aDave Airlie AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND); 1714f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1715f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 1716f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1717f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx, 1718f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_intrinsic_instr *instr) 1719f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 1720f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef index = get_src(ctx, instr->src[0]); 1721f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned desc_set = nir_intrinsic_desc_set(instr); 1722f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned binding = nir_intrinsic_binding(instr); 1723f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set]; 1724823f6d48a12c748312e46db76f8f159e42bfe622Fredrik Höglund struct radv_pipeline_layout *pipeline_layout = ctx->options->layout; 1725823f6d48a12c748312e46db76f8f159e42bfe622Fredrik Höglund struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout; 1726f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned base_offset = layout->binding[binding].offset; 1727f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef offset, stride; 1728f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1729f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || 1730f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) { 1731823f6d48a12c748312e46db76f8f159e42bfe622Fredrik Höglund unsigned idx = pipeline_layout->set[desc_set].dynamic_offset_start + 1732823f6d48a12c748312e46db76f8f159e42bfe622Fredrik Höglund layout->binding[binding].dynamic_offset_offset; 1733f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie desc_ptr = ctx->push_constants; 1734823f6d48a12c748312e46db76f8f159e42bfe622Fredrik Höglund base_offset = pipeline_layout->push_constant_size + 16 * idx; 1735f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie stride = LLVMConstInt(ctx->i32, 16, false); 1736f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else 1737f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie stride = LLVMConstInt(ctx->i32, layout->binding[binding].size, false); 1738f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1739f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie offset = LLVMConstInt(ctx->i32, base_offset, false); 1740f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie index = LLVMBuildMul(ctx->builder, index, stride, ""); 1741f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie offset = LLVMBuildAdd(ctx->builder, offset, index, ""); 1742e54af02567c8482c3aae983bdb6b58abe41c0eb0Dave Airlie 1743e54af02567c8482c3aae983bdb6b58abe41c0eb0Dave Airlie desc_ptr = build_gep0(ctx, desc_ptr, offset); 1744f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie desc_ptr = cast_ptr(ctx, desc_ptr, ctx->v4i32); 1745f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMSetMetadata(desc_ptr, ctx->uniform_md_kind, ctx->empty_md); 1746f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1747f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return LLVMBuildLoad(ctx->builder, desc_ptr, ""); 1748f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 1749f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1750f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef visit_load_push_constant(struct nir_to_llvm_context *ctx, 1751f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_intrinsic_instr *instr) 1752f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 1753d7d772f9031ece78320f1765b811d5a1e588b311Bas Nieuwenhuizen LLVMValueRef ptr, addr; 1754d7d772f9031ece78320f1765b811d5a1e588b311Bas Nieuwenhuizen 1755d7d772f9031ece78320f1765b811d5a1e588b311Bas Nieuwenhuizen addr = LLVMConstInt(ctx->i32, nir_intrinsic_base(instr), 0); 1756d7d772f9031ece78320f1765b811d5a1e588b311Bas Nieuwenhuizen addr = LLVMBuildAdd(ctx->builder, addr, get_src(ctx, instr->src[0]), ""); 1757f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1758d7d772f9031ece78320f1765b811d5a1e588b311Bas Nieuwenhuizen ptr = build_gep0(ctx, ctx->push_constants, addr); 1759f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ptr = cast_ptr(ctx, ptr, get_def_type(ctx, &instr->dest.ssa)); 1760f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1761f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return LLVMBuildLoad(ctx->builder, ptr, ""); 1762f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 1763f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1764f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef visit_get_buffer_size(struct nir_to_llvm_context *ctx, 1765f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_intrinsic_instr *instr) 1766f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 1767f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef desc = get_src(ctx, instr->src[0]); 1768f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1769f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return get_buffer_size(ctx, desc, false); 1770f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 1771f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void visit_store_ssbo(struct nir_to_llvm_context *ctx, 1772f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_intrinsic_instr *instr) 1773f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 1774f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const char *store_name; 1775f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef data_type = ctx->f32; 1776f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned writemask = nir_intrinsic_write_mask(instr); 1777f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef base_data, base_offset; 1778f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef params[6]; 1779f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1780f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (ctx->stage == MESA_SHADER_FRAGMENT) 1781f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->shader_info->fs.writes_memory = true; 1782f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1783f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[1] = get_src(ctx, instr->src[1]); 1784f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[2] = LLVMConstInt(ctx->i32, 0, false); /* vindex */ 1785f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[4] = LLVMConstInt(ctx->i1, 0, false); /* glc */ 1786f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[5] = LLVMConstInt(ctx->i1, 0, false); /* slc */ 1787f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1788f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (instr->num_components > 1) 1789f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie data_type = LLVMVectorType(ctx->f32, instr->num_components); 1790f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1791f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie base_data = to_float(ctx, get_src(ctx, instr->src[0])); 1792f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie base_data = trim_vector(ctx, base_data, instr->num_components); 1793f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie base_data = LLVMBuildBitCast(ctx->builder, base_data, 1794f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie data_type, ""); 1795f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie base_offset = get_src(ctx, instr->src[2]); /* voffset */ 1796f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie while (writemask) { 1797f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int start, count; 1798f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef data; 1799f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef offset; 1800f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef tmp; 1801f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie u_bit_scan_consecutive_range(&writemask, &start, &count); 1802f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1803f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* Due to an LLVM limitation, split 3-element writes 1804f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * into a 2-element and a 1-element write. */ 1805f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (count == 3) { 1806f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie writemask |= 1 << (start + 2); 1807f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie count = 2; 1808f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 1809f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1810f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (count == 4) { 1811f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie store_name = "llvm.amdgcn.buffer.store.v4f32"; 1812f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie data = base_data; 1813f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else if (count == 2) { 1814f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tmp = LLVMBuildExtractElement(ctx->builder, 1815f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie base_data, LLVMConstInt(ctx->i32, start, false), ""); 1816f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie data = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), tmp, 1817f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->i32zero, ""); 1818f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1819f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tmp = LLVMBuildExtractElement(ctx->builder, 1820f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie base_data, LLVMConstInt(ctx->i32, start + 1, false), ""); 1821f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie data = LLVMBuildInsertElement(ctx->builder, data, tmp, 1822f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->i32one, ""); 1823f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie store_name = "llvm.amdgcn.buffer.store.v2f32"; 1824f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1825f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else { 1826f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie assert(count == 1); 1827f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (get_llvm_num_components(base_data) > 1) 1828f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie data = LLVMBuildExtractElement(ctx->builder, base_data, 1829f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, start, false), ""); 1830f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie else 1831f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie data = base_data; 1832f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie store_name = "llvm.amdgcn.buffer.store.f32"; 1833f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 1834f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1835f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie offset = base_offset; 1836f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (start != 0) { 1837f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie offset = LLVMBuildAdd(ctx->builder, offset, LLVMConstInt(ctx->i32, start * 4, false), ""); 1838f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 1839f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[0] = data; 1840f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[3] = offset; 18412c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle ac_emit_llvm_intrinsic(&ctx->ac, store_name, 1842d4392a877c0d4c930e8d2a016ae2c8b801e49c49Dave Airlie ctx->voidt, params, 6, 0); 1843f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 1844f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 1845f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1846f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef visit_atomic_ssbo(struct nir_to_llvm_context *ctx, 1847f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_intrinsic_instr *instr) 1848f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 1849f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const char *name; 1850abc887faa108b52f1e622ac8c1a3e30c4f317475Bas Nieuwenhuizen LLVMValueRef params[6]; 1851f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int arg_count = 0; 1852f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (ctx->stage == MESA_SHADER_FRAGMENT) 1853f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->shader_info->fs.writes_memory = true; 1854f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1855f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) { 185692d7563fba04219a0a96a4949182b1f94b6d79b0Bas Nieuwenhuizen params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx, instr->src[3]), 0); 1857f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 185892d7563fba04219a0a96a4949182b1f94b6d79b0Bas Nieuwenhuizen params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx, instr->src[2]), 0); 1859f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[arg_count++] = get_src(ctx, instr->src[0]); 1860f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[arg_count++] = LLVMConstInt(ctx->i32, 0, false); /* vindex */ 1861f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */ 1862f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[arg_count++] = LLVMConstInt(ctx->i1, 0, false); /* slc */ 1863f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1864f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie switch (instr->intrinsic) { 1865f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_ssbo_atomic_add: 1866f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie name = "llvm.amdgcn.buffer.atomic.add"; 1867f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1868f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_ssbo_atomic_imin: 1869f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie name = "llvm.amdgcn.buffer.atomic.smin"; 1870f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1871f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_ssbo_atomic_umin: 1872f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie name = "llvm.amdgcn.buffer.atomic.umin"; 1873f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1874f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_ssbo_atomic_imax: 1875f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie name = "llvm.amdgcn.buffer.atomic.smax"; 1876f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1877f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_ssbo_atomic_umax: 1878f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie name = "llvm.amdgcn.buffer.atomic.umax"; 1879f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1880f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_ssbo_atomic_and: 1881f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie name = "llvm.amdgcn.buffer.atomic.and"; 1882f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1883f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_ssbo_atomic_or: 1884f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie name = "llvm.amdgcn.buffer.atomic.or"; 1885f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1886f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_ssbo_atomic_xor: 1887f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie name = "llvm.amdgcn.buffer.atomic.xor"; 1888f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1889f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_ssbo_atomic_exchange: 1890f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie name = "llvm.amdgcn.buffer.atomic.swap"; 1891f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1892f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_ssbo_atomic_comp_swap: 1893f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie name = "llvm.amdgcn.buffer.atomic.cmpswap"; 1894f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 1895f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie default: 1896f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie abort(); 1897f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 1898f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 18992c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle return ac_emit_llvm_intrinsic(&ctx->ac, name, ctx->i32, params, arg_count, 0); 1900f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 1901f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1902f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef visit_load_buffer(struct nir_to_llvm_context *ctx, 1903f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_intrinsic_instr *instr) 1904f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 1905f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const char *load_name; 1906f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef data_type = ctx->f32; 1907f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (instr->num_components == 3) 1908f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie data_type = LLVMVectorType(ctx->f32, 4); 1909f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie else if (instr->num_components > 1) 1910f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie data_type = LLVMVectorType(ctx->f32, instr->num_components); 1911f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1912f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (instr->num_components == 4 || instr->num_components == 3) 1913f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie load_name = "llvm.amdgcn.buffer.load.v4f32"; 1914f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie else if (instr->num_components == 2) 1915f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie load_name = "llvm.amdgcn.buffer.load.v2f32"; 1916f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie else if (instr->num_components == 1) 1917f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie load_name = "llvm.amdgcn.buffer.load.f32"; 1918f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie else 1919f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie abort(); 1920f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1921f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef params[] = { 1922f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie get_src(ctx, instr->src[0]), 1923f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, 0, false), 1924f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie get_src(ctx, instr->src[1]), 1925f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i1, 0, false), 1926f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i1, 0, false), 1927f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie }; 1928f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1929f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef ret = 19302c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle ac_emit_llvm_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0); 1931f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1932f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (instr->num_components == 3) 1933f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ret = trim_vector(ctx, ret, 3); 1934f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1935f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return LLVMBuildBitCast(ctx->builder, ret, 1936f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie get_def_type(ctx, &instr->dest.ssa), ""); 1937f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 1938f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 193905533ce418851b12fd0a1e940a633f9280203aabBas Nieuwenhuizenstatic LLVMValueRef visit_load_ubo_buffer(struct nir_to_llvm_context *ctx, 194005533ce418851b12fd0a1e940a633f9280203aabBas Nieuwenhuizen nir_intrinsic_instr *instr) 194105533ce418851b12fd0a1e940a633f9280203aabBas Nieuwenhuizen{ 194205533ce418851b12fd0a1e940a633f9280203aabBas Nieuwenhuizen LLVMValueRef results[4], ret; 194305533ce418851b12fd0a1e940a633f9280203aabBas Nieuwenhuizen LLVMValueRef rsrc = get_src(ctx, instr->src[0]); 194405533ce418851b12fd0a1e940a633f9280203aabBas Nieuwenhuizen LLVMValueRef offset = get_src(ctx, instr->src[1]); 194505533ce418851b12fd0a1e940a633f9280203aabBas Nieuwenhuizen 194605533ce418851b12fd0a1e940a633f9280203aabBas Nieuwenhuizen rsrc = LLVMBuildBitCast(ctx->builder, rsrc, LLVMVectorType(ctx->i8, 16), ""); 194705533ce418851b12fd0a1e940a633f9280203aabBas Nieuwenhuizen 194805533ce418851b12fd0a1e940a633f9280203aabBas Nieuwenhuizen for (unsigned i = 0; i < instr->num_components; ++i) { 194905533ce418851b12fd0a1e940a633f9280203aabBas Nieuwenhuizen LLVMValueRef params[] = { 195005533ce418851b12fd0a1e940a633f9280203aabBas Nieuwenhuizen rsrc, 195105533ce418851b12fd0a1e940a633f9280203aabBas Nieuwenhuizen LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->i32, 4 * i, 0), 195205533ce418851b12fd0a1e940a633f9280203aabBas Nieuwenhuizen offset, "") 195305533ce418851b12fd0a1e940a633f9280203aabBas Nieuwenhuizen }; 19542c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle results[i] = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.load.const", ctx->f32, 195505533ce418851b12fd0a1e940a633f9280203aabBas Nieuwenhuizen params, 2, AC_FUNC_ATTR_READNONE); 195605533ce418851b12fd0a1e940a633f9280203aabBas Nieuwenhuizen } 195705533ce418851b12fd0a1e940a633f9280203aabBas Nieuwenhuizen 195805533ce418851b12fd0a1e940a633f9280203aabBas Nieuwenhuizen 195938c67f77ed35423bdba5ee1ffd5ab6e5b1765f90Nicolai Hähnle ret = ac_build_gather_values(&ctx->ac, results, instr->num_components); 196005533ce418851b12fd0a1e940a633f9280203aabBas Nieuwenhuizen return LLVMBuildBitCast(ctx->builder, ret, 196105533ce418851b12fd0a1e940a633f9280203aabBas Nieuwenhuizen get_def_type(ctx, &instr->dest.ssa), ""); 196205533ce418851b12fd0a1e940a633f9280203aabBas Nieuwenhuizen} 196305533ce418851b12fd0a1e940a633f9280203aabBas Nieuwenhuizen 1964f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void 1965f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlieradv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref *tail, 1966f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie bool vs_in, unsigned *const_out, LLVMValueRef *indir_out) 1967f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 1968f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned const_offset = 0; 1969f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef offset = NULL; 1970f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1971f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1972f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie while (tail->child != NULL) { 1973f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const struct glsl_type *parent_type = tail->type; 1974f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tail = tail->child; 1975f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1976f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (tail->deref_type == nir_deref_type_array) { 1977f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_deref_array *deref_array = nir_deref_as_array(tail); 1978f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef index, stride, local_offset; 1979f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned size = glsl_count_attribute_slots(tail->type, vs_in); 1980f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1981f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const_offset += size * deref_array->base_offset; 1982f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (deref_array->deref_array_type == nir_deref_array_type_direct) 1983f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie continue; 1984f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1985f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie assert(deref_array->deref_array_type == nir_deref_array_type_indirect); 1986f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie index = get_src(ctx, deref_array->indirect); 1987f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie stride = LLVMConstInt(ctx->i32, size, 0); 1988f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie local_offset = LLVMBuildMul(ctx->builder, stride, index, ""); 1989f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1990f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (offset) 1991f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie offset = LLVMBuildAdd(ctx->builder, offset, local_offset, ""); 1992f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie else 1993f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie offset = local_offset; 1994f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else if (tail->deref_type == nir_deref_type_struct) { 1995f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_deref_struct *deref_struct = nir_deref_as_struct(tail); 1996f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 1997f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned i = 0; i < deref_struct->index; i++) { 1998f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const struct glsl_type *ft = glsl_get_struct_field(parent_type, i); 1999f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const_offset += glsl_count_attribute_slots(ft, vs_in); 2000f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2001f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else 2002f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unreachable("unsupported deref type"); 2003f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2004f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2005f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2006f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (const_offset && offset) 2007f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie offset = LLVMBuildAdd(ctx->builder, offset, 2008f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, const_offset, 0), 2009f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ""); 2010f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2011f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie *const_out = const_offset; 2012f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie *indir_out = offset; 2013f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 2014f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2015f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, 2016f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_intrinsic_instr *instr) 2017f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 2018f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef values[4]; 2019f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int idx = instr->variables[0]->var->data.driver_location; 2020f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int ve = instr->dest.ssa.num_components; 2021f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef indir_index; 2022f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned const_index; 2023f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie switch (instr->variables[0]->var->data.mode) { 2024f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_var_shader_in: 2025f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie radv_get_deref_offset(ctx, &instr->variables[0]->deref, 2026f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->stage == MESA_SHADER_VERTEX, 2027f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie &const_index, &indir_index); 2028f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned chan = 0; chan < ve; chan++) { 2029f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (indir_index) { 2030f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned count = glsl_count_attribute_slots( 2031f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie instr->variables[0]->var->type, 2032f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->stage == MESA_SHADER_VERTEX); 203338c67f77ed35423bdba5ee1ffd5ab6e5b1765f90Nicolai Hähnle LLVMValueRef tmp_vec = ac_build_gather_values_extended( 203438c67f77ed35423bdba5ee1ffd5ab6e5b1765f90Nicolai Hähnle &ctx->ac, ctx->inputs + idx + chan, count, 2035f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4, false); 2036f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2037f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie values[chan] = LLVMBuildExtractElement(ctx->builder, 2038f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tmp_vec, 2039f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie indir_index, ""); 2040f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else 2041f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie values[chan] = ctx->inputs[idx + chan + const_index * 4]; 2042f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 204338c67f77ed35423bdba5ee1ffd5ab6e5b1765f90Nicolai Hähnle return to_integer(ctx, ac_build_gather_values(&ctx->ac, values, ve)); 2044f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2045f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_var_local: 2046f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie radv_get_deref_offset(ctx, &instr->variables[0]->deref, false, 2047f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie &const_index, &indir_index); 2048f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned chan = 0; chan < ve; chan++) { 2049f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (indir_index) { 2050f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned count = glsl_count_attribute_slots( 2051f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie instr->variables[0]->var->type, false); 205238c67f77ed35423bdba5ee1ffd5ab6e5b1765f90Nicolai Hähnle LLVMValueRef tmp_vec = ac_build_gather_values_extended( 205338c67f77ed35423bdba5ee1ffd5ab6e5b1765f90Nicolai Hähnle &ctx->ac, ctx->locals + idx + chan, count, 2054f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4, true); 2055f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2056f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie values[chan] = LLVMBuildExtractElement(ctx->builder, 2057f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tmp_vec, 2058f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie indir_index, ""); 2059f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else { 2060f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie values[chan] = LLVMBuildLoad(ctx->builder, ctx->locals[idx + chan + const_index * 4], ""); 2061f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2062f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 206338c67f77ed35423bdba5ee1ffd5ab6e5b1765f90Nicolai Hähnle return to_integer(ctx, ac_build_gather_values(&ctx->ac, values, ve)); 2064f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_var_shader_out: 2065f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie radv_get_deref_offset(ctx, &instr->variables[0]->deref, false, 2066f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie &const_index, &indir_index); 2067f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned chan = 0; chan < ve; chan++) { 2068f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (indir_index) { 2069f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned count = glsl_count_attribute_slots( 2070f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie instr->variables[0]->var->type, false); 207138c67f77ed35423bdba5ee1ffd5ab6e5b1765f90Nicolai Hähnle LLVMValueRef tmp_vec = ac_build_gather_values_extended( 207238c67f77ed35423bdba5ee1ffd5ab6e5b1765f90Nicolai Hähnle &ctx->ac, ctx->outputs + idx + chan, count, 2073f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4, true); 2074f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2075f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie values[chan] = LLVMBuildExtractElement(ctx->builder, 2076f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tmp_vec, 2077f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie indir_index, ""); 2078f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else { 2079f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie values[chan] = LLVMBuildLoad(ctx->builder, 2080f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->outputs[idx + chan + const_index * 4], 2081f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ""); 2082f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2083f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 208438c67f77ed35423bdba5ee1ffd5ab6e5b1765f90Nicolai Hähnle return to_integer(ctx, ac_build_gather_values(&ctx->ac, values, ve)); 2085f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_var_shared: { 2086f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie radv_get_deref_offset(ctx, &instr->variables[0]->deref, false, 2087f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie &const_index, &indir_index); 2088f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32); 2089f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef derived_ptr; 2090f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2091241e0b7068c0ae8e5e3480b9f6846a5749184626Dave Airlie if (indir_index) 2092241e0b7068c0ae8e5e3480b9f6846a5749184626Dave Airlie indir_index = LLVMBuildMul(ctx->builder, indir_index, LLVMConstInt(ctx->i32, 4, false), ""); 2093241e0b7068c0ae8e5e3480b9f6846a5749184626Dave Airlie 20944813c9ade70b4181ccf5d0ab462cf34da96373a6Dave Airlie for (unsigned chan = 0; chan < ve; chan++) { 20954813c9ade70b4181ccf5d0ab462cf34da96373a6Dave Airlie LLVMValueRef index = LLVMConstInt(ctx->i32, chan, false); 20964813c9ade70b4181ccf5d0ab462cf34da96373a6Dave Airlie if (indir_index) 20974813c9ade70b4181ccf5d0ab462cf34da96373a6Dave Airlie index = LLVMBuildAdd(ctx->builder, index, indir_index, ""); 20984813c9ade70b4181ccf5d0ab462cf34da96373a6Dave Airlie derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, ""); 20994813c9ade70b4181ccf5d0ab462cf34da96373a6Dave Airlie values[chan] = LLVMBuildLoad(ctx->builder, derived_ptr, ""); 21004813c9ade70b4181ccf5d0ab462cf34da96373a6Dave Airlie } 210138c67f77ed35423bdba5ee1ffd5ab6e5b1765f90Nicolai Hähnle return to_integer(ctx, ac_build_gather_values(&ctx->ac, values, ve)); 2102f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2103f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie default: 2104f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2105f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2106f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return NULL; 2107f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 2108f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2109f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void 2110f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlievisit_store_var(struct nir_to_llvm_context *ctx, 2111f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_intrinsic_instr *instr) 2112f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 2113f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef temp_ptr, value; 2114f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int idx = instr->variables[0]->var->data.driver_location; 2115f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src = to_float(ctx, get_src(ctx, instr->src[0])); 2116f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int writemask = instr->const_index[0]; 2117f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef indir_index; 2118f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned const_index; 2119f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie switch (instr->variables[0]->var->data.mode) { 2120f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_var_shader_out: 2121f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie radv_get_deref_offset(ctx, &instr->variables[0]->deref, false, 2122f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie &const_index, &indir_index); 2123f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned chan = 0; chan < 4; chan++) { 2124f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int stride = 4; 2125f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (!(writemask & (1 << chan))) 2126f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie continue; 2127f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (get_llvm_num_components(src) == 1) 2128f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie value = src; 2129f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie else 2130f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie value = LLVMBuildExtractElement(ctx->builder, src, 2131f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, 2132f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie chan, false), 2133f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ""); 2134f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2135f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 || 2136f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie instr->variables[0]->var->data.location == VARYING_SLOT_CULL_DIST0) 2137f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie stride = 1; 2138f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (indir_index) { 2139f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned count = glsl_count_attribute_slots( 2140f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie instr->variables[0]->var->type, false); 214138c67f77ed35423bdba5ee1ffd5ab6e5b1765f90Nicolai Hähnle LLVMValueRef tmp_vec = ac_build_gather_values_extended( 214238c67f77ed35423bdba5ee1ffd5ab6e5b1765f90Nicolai Hähnle &ctx->ac, ctx->outputs + idx + chan, count, 2143f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie stride, true); 2144f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2145f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (get_llvm_num_components(tmp_vec) > 1) { 2146f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec, 2147f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie value, indir_index, ""); 2148f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else 2149f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tmp_vec = value; 2150f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie build_store_values_extended(ctx, ctx->outputs + idx + chan, 2151f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie count, stride, tmp_vec); 2152f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2153f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else { 2154f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie temp_ptr = ctx->outputs[idx + chan + const_index * stride]; 2155f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2156f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildStore(ctx->builder, value, temp_ptr); 2157f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2158f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2159f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2160f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_var_local: 2161f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie radv_get_deref_offset(ctx, &instr->variables[0]->deref, false, 2162f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie &const_index, &indir_index); 2163f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned chan = 0; chan < 4; chan++) { 2164f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (!(writemask & (1 << chan))) 2165f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie continue; 2166f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2167f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (get_llvm_num_components(src) == 1) 2168f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie value = src; 2169f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie else 2170f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie value = LLVMBuildExtractElement(ctx->builder, src, 2171f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, chan, false), ""); 2172f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (indir_index) { 2173f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned count = glsl_count_attribute_slots( 2174f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie instr->variables[0]->var->type, false); 217538c67f77ed35423bdba5ee1ffd5ab6e5b1765f90Nicolai Hähnle LLVMValueRef tmp_vec = ac_build_gather_values_extended( 217638c67f77ed35423bdba5ee1ffd5ab6e5b1765f90Nicolai Hähnle &ctx->ac, ctx->locals + idx + chan, count, 2177f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4, true); 2178f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2179f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec, 2180f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie value, indir_index, ""); 2181f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie build_store_values_extended(ctx, ctx->locals + idx + chan, 2182f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie count, 4, tmp_vec); 2183f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else { 2184f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie temp_ptr = ctx->locals[idx + chan + const_index * 4]; 2185f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2186f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildStore(ctx->builder, value, temp_ptr); 2187f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2188f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2189f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2190f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_var_shared: { 2191f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef ptr; 2192f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie radv_get_deref_offset(ctx, &instr->variables[0]->deref, false, 2193f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie &const_index, &indir_index); 2194f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2195f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ptr = get_shared_memory_ptr(ctx, idx, ctx->i32); 2196f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef derived_ptr; 2197f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2198241e0b7068c0ae8e5e3480b9f6846a5749184626Dave Airlie if (indir_index) 2199241e0b7068c0ae8e5e3480b9f6846a5749184626Dave Airlie indir_index = LLVMBuildMul(ctx->builder, indir_index, LLVMConstInt(ctx->i32, 4, false), ""); 2200241e0b7068c0ae8e5e3480b9f6846a5749184626Dave Airlie 22014813c9ade70b4181ccf5d0ab462cf34da96373a6Dave Airlie for (unsigned chan = 0; chan < 4; chan++) { 22024813c9ade70b4181ccf5d0ab462cf34da96373a6Dave Airlie if (!(writemask & (1 << chan))) 22034813c9ade70b4181ccf5d0ab462cf34da96373a6Dave Airlie continue; 22044813c9ade70b4181ccf5d0ab462cf34da96373a6Dave Airlie 22054813c9ade70b4181ccf5d0ab462cf34da96373a6Dave Airlie LLVMValueRef index = LLVMConstInt(ctx->i32, chan, false); 22064813c9ade70b4181ccf5d0ab462cf34da96373a6Dave Airlie 22074813c9ade70b4181ccf5d0ab462cf34da96373a6Dave Airlie if (get_llvm_num_components(src) == 1) 22084813c9ade70b4181ccf5d0ab462cf34da96373a6Dave Airlie value = src; 22094813c9ade70b4181ccf5d0ab462cf34da96373a6Dave Airlie else 22104813c9ade70b4181ccf5d0ab462cf34da96373a6Dave Airlie value = LLVMBuildExtractElement(ctx->builder, src, 22114813c9ade70b4181ccf5d0ab462cf34da96373a6Dave Airlie LLVMConstInt(ctx->i32, 22124813c9ade70b4181ccf5d0ab462cf34da96373a6Dave Airlie chan, false), 22134813c9ade70b4181ccf5d0ab462cf34da96373a6Dave Airlie ""); 22144813c9ade70b4181ccf5d0ab462cf34da96373a6Dave Airlie 22154813c9ade70b4181ccf5d0ab462cf34da96373a6Dave Airlie if (indir_index) 22164813c9ade70b4181ccf5d0ab462cf34da96373a6Dave Airlie index = LLVMBuildAdd(ctx->builder, index, indir_index, ""); 22174813c9ade70b4181ccf5d0ab462cf34da96373a6Dave Airlie 22184813c9ade70b4181ccf5d0ab462cf34da96373a6Dave Airlie derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, ""); 22194813c9ade70b4181ccf5d0ab462cf34da96373a6Dave Airlie LLVMBuildStore(ctx->builder, 22204813c9ade70b4181ccf5d0ab462cf34da96373a6Dave Airlie to_integer(ctx, value), derived_ptr); 22214813c9ade70b4181ccf5d0ab462cf34da96373a6Dave Airlie } 2222f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2223f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2224f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie default: 2225f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2226f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2227f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 2228f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2229f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic int image_type_to_components_count(enum glsl_sampler_dim dim, bool array) 2230f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 2231f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie switch (dim) { 2232f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case GLSL_SAMPLER_DIM_BUF: 2233f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return 1; 2234f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case GLSL_SAMPLER_DIM_1D: 2235f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return array ? 2 : 1; 2236f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case GLSL_SAMPLER_DIM_2D: 2237f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return array ? 3 : 2; 2238ada66480b29be5730b217943e626b246ce6c67fdDave Airlie case GLSL_SAMPLER_DIM_MS: 2239ada66480b29be5730b217943e626b246ce6c67fdDave Airlie return array ? 4 : 3; 2240f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case GLSL_SAMPLER_DIM_3D: 2241f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case GLSL_SAMPLER_DIM_CUBE: 2242f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return 3; 2243f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case GLSL_SAMPLER_DIM_RECT: 2244f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case GLSL_SAMPLER_DIM_SUBPASS: 2245f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return 2; 2246ada66480b29be5730b217943e626b246ce6c67fdDave Airlie case GLSL_SAMPLER_DIM_SUBPASS_MS: 2247ada66480b29be5730b217943e626b246ce6c67fdDave Airlie return 3; 2248f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie default: 2249f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2250f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2251f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return 0; 2252f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 2253f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2254f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx, 22557083ca262581540265765bb0af7a11616fe1c6a3Dave Airlie nir_intrinsic_instr *instr) 2256f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 2257f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const struct glsl_type *type = instr->variables[0]->var->type; 2258f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if(instr->variables[0]->deref.child) 2259f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie type = instr->variables[0]->deref.child->type; 2260f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2261f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src0 = get_src(ctx, instr->src[0]); 2262f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef coords[4]; 2263f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef masks[] = { 2264f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false), 2265f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false), 2266f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie }; 2267f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef res; 2268f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int count; 2269ada66480b29be5730b217943e626b246ce6c67fdDave Airlie enum glsl_sampler_dim dim = glsl_get_sampler_dim(type); 22707083ca262581540265765bb0af7a11616fe1c6a3Dave Airlie bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS || 22717083ca262581540265765bb0af7a11616fe1c6a3Dave Airlie dim == GLSL_SAMPLER_DIM_SUBPASS_MS); 2272ada66480b29be5730b217943e626b246ce6c67fdDave Airlie bool is_ms = (dim == GLSL_SAMPLER_DIM_MS || 2273ada66480b29be5730b217943e626b246ce6c67fdDave Airlie dim == GLSL_SAMPLER_DIM_SUBPASS_MS); 2274ada66480b29be5730b217943e626b246ce6c67fdDave Airlie 2275ada66480b29be5730b217943e626b246ce6c67fdDave Airlie count = image_type_to_components_count(dim, 2276f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie glsl_sampler_type_is_array(type)); 2277f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2278f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (count == 1) { 2279f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (instr->src[0].ssa->num_components) 2280f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie res = LLVMBuildExtractElement(ctx->builder, src0, masks[0], ""); 2281f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie else 2282f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie res = src0; 2283f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else { 2284f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int chan; 2285ada66480b29be5730b217943e626b246ce6c67fdDave Airlie if (is_ms) 2286ada66480b29be5730b217943e626b246ce6c67fdDave Airlie count--; 2287f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (chan = 0; chan < count; ++chan) { 2288f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie coords[chan] = LLVMBuildExtractElement(ctx->builder, src0, masks[chan], ""); 2289f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2290f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2291f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (add_frag_pos) { 2292f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (chan = 0; chan < count; ++chan) 2293f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie coords[chan] = LLVMBuildAdd(ctx->builder, coords[chan], LLVMBuildFPToUI(ctx->builder, ctx->frag_pos[chan], ctx->i32, ""), ""); 2294f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2295ada66480b29be5730b217943e626b246ce6c67fdDave Airlie if (is_ms) { 2296ada66480b29be5730b217943e626b246ce6c67fdDave Airlie coords[count] = llvm_extract_elem(ctx, get_src(ctx, instr->src[1]), 0); 2297ada66480b29be5730b217943e626b246ce6c67fdDave Airlie count++; 2298ada66480b29be5730b217943e626b246ce6c67fdDave Airlie } 2299ada66480b29be5730b217943e626b246ce6c67fdDave Airlie 2300f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (count == 3) { 2301f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie coords[3] = LLVMGetUndef(ctx->i32); 2302f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie count = 4; 2303f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 230438c67f77ed35423bdba5ee1ffd5ab6e5b1765f90Nicolai Hähnle res = ac_build_gather_values(&ctx->ac, coords, count); 2305f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2306f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return res; 2307f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 2308f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 23095c66d46d6ace0603227e4b919b132416444949e7Tom Stellardstatic void build_type_name_for_intr( 23105c66d46d6ace0603227e4b919b132416444949e7Tom Stellard LLVMTypeRef type, 23115c66d46d6ace0603227e4b919b132416444949e7Tom Stellard char *buf, unsigned bufsize) 23125c66d46d6ace0603227e4b919b132416444949e7Tom Stellard{ 23135c66d46d6ace0603227e4b919b132416444949e7Tom Stellard LLVMTypeRef elem_type = type; 23145c66d46d6ace0603227e4b919b132416444949e7Tom Stellard 23155c66d46d6ace0603227e4b919b132416444949e7Tom Stellard assert(bufsize >= 8); 23165c66d46d6ace0603227e4b919b132416444949e7Tom Stellard 23175c66d46d6ace0603227e4b919b132416444949e7Tom Stellard if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) { 23185c66d46d6ace0603227e4b919b132416444949e7Tom Stellard int ret = snprintf(buf, bufsize, "v%u", 23195c66d46d6ace0603227e4b919b132416444949e7Tom Stellard LLVMGetVectorSize(type)); 23205c66d46d6ace0603227e4b919b132416444949e7Tom Stellard if (ret < 0) { 23215c66d46d6ace0603227e4b919b132416444949e7Tom Stellard char *type_name = LLVMPrintTypeToString(type); 23225c66d46d6ace0603227e4b919b132416444949e7Tom Stellard fprintf(stderr, "Error building type name for: %s\n", 23235c66d46d6ace0603227e4b919b132416444949e7Tom Stellard type_name); 23245c66d46d6ace0603227e4b919b132416444949e7Tom Stellard return; 23255c66d46d6ace0603227e4b919b132416444949e7Tom Stellard } 23265c66d46d6ace0603227e4b919b132416444949e7Tom Stellard elem_type = LLVMGetElementType(type); 23275c66d46d6ace0603227e4b919b132416444949e7Tom Stellard buf += ret; 23285c66d46d6ace0603227e4b919b132416444949e7Tom Stellard bufsize -= ret; 23295c66d46d6ace0603227e4b919b132416444949e7Tom Stellard } 23305c66d46d6ace0603227e4b919b132416444949e7Tom Stellard switch (LLVMGetTypeKind(elem_type)) { 23315c66d46d6ace0603227e4b919b132416444949e7Tom Stellard default: break; 23325c66d46d6ace0603227e4b919b132416444949e7Tom Stellard case LLVMIntegerTypeKind: 23335c66d46d6ace0603227e4b919b132416444949e7Tom Stellard snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type)); 23345c66d46d6ace0603227e4b919b132416444949e7Tom Stellard break; 23355c66d46d6ace0603227e4b919b132416444949e7Tom Stellard case LLVMFloatTypeKind: 23365c66d46d6ace0603227e4b919b132416444949e7Tom Stellard snprintf(buf, bufsize, "f32"); 23375c66d46d6ace0603227e4b919b132416444949e7Tom Stellard break; 23385c66d46d6ace0603227e4b919b132416444949e7Tom Stellard case LLVMDoubleTypeKind: 23395c66d46d6ace0603227e4b919b132416444949e7Tom Stellard snprintf(buf, bufsize, "f64"); 23405c66d46d6ace0603227e4b919b132416444949e7Tom Stellard break; 23415c66d46d6ace0603227e4b919b132416444949e7Tom Stellard } 23425c66d46d6ace0603227e4b919b132416444949e7Tom Stellard} 23435c66d46d6ace0603227e4b919b132416444949e7Tom Stellard 23445c66d46d6ace0603227e4b919b132416444949e7Tom Stellardstatic void get_image_intr_name(const char *base_name, 23455c66d46d6ace0603227e4b919b132416444949e7Tom Stellard LLVMTypeRef data_type, 23465c66d46d6ace0603227e4b919b132416444949e7Tom Stellard LLVMTypeRef coords_type, 23475c66d46d6ace0603227e4b919b132416444949e7Tom Stellard LLVMTypeRef rsrc_type, 23485c66d46d6ace0603227e4b919b132416444949e7Tom Stellard char *out_name, unsigned out_len) 23495c66d46d6ace0603227e4b919b132416444949e7Tom Stellard{ 23505c66d46d6ace0603227e4b919b132416444949e7Tom Stellard char coords_type_name[8]; 23515c66d46d6ace0603227e4b919b132416444949e7Tom Stellard 23525c66d46d6ace0603227e4b919b132416444949e7Tom Stellard build_type_name_for_intr(coords_type, coords_type_name, 23535c66d46d6ace0603227e4b919b132416444949e7Tom Stellard sizeof(coords_type_name)); 23545c66d46d6ace0603227e4b919b132416444949e7Tom Stellard 23555c66d46d6ace0603227e4b919b132416444949e7Tom Stellard if (HAVE_LLVM <= 0x0309) { 23565c66d46d6ace0603227e4b919b132416444949e7Tom Stellard snprintf(out_name, out_len, "%s.%s", base_name, coords_type_name); 23575c66d46d6ace0603227e4b919b132416444949e7Tom Stellard } else { 23585c66d46d6ace0603227e4b919b132416444949e7Tom Stellard char data_type_name[8]; 23595c66d46d6ace0603227e4b919b132416444949e7Tom Stellard char rsrc_type_name[8]; 23605c66d46d6ace0603227e4b919b132416444949e7Tom Stellard 23615c66d46d6ace0603227e4b919b132416444949e7Tom Stellard build_type_name_for_intr(data_type, data_type_name, 23625c66d46d6ace0603227e4b919b132416444949e7Tom Stellard sizeof(data_type_name)); 23635c66d46d6ace0603227e4b919b132416444949e7Tom Stellard build_type_name_for_intr(rsrc_type, rsrc_type_name, 23645c66d46d6ace0603227e4b919b132416444949e7Tom Stellard sizeof(rsrc_type_name)); 23655c66d46d6ace0603227e4b919b132416444949e7Tom Stellard snprintf(out_name, out_len, "%s.%s.%s.%s", base_name, 23665c66d46d6ace0603227e4b919b132416444949e7Tom Stellard data_type_name, coords_type_name, rsrc_type_name); 23675c66d46d6ace0603227e4b919b132416444949e7Tom Stellard } 23685c66d46d6ace0603227e4b919b132416444949e7Tom Stellard} 23695c66d46d6ace0603227e4b919b132416444949e7Tom Stellard 2370f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef visit_image_load(struct nir_to_llvm_context *ctx, 2371f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_intrinsic_instr *instr) 2372f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 2373f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef params[7]; 2374f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef res; 23755c66d46d6ace0603227e4b919b132416444949e7Tom Stellard char intrinsic_name[64]; 2376f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const nir_variable *var = instr->variables[0]->var; 2377f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const struct glsl_type *type = var->type; 2378f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if(instr->variables[0]->deref.child) 2379f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie type = instr->variables[0]->deref.child->type; 2380f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2381f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie type = glsl_without_array(type); 2382f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) { 2383f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[0] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER); 2384f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[1] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]), 2385f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */ 2386f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[2] = LLVMConstInt(ctx->i32, 0, false); /* voffset */ 2387f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[3] = LLVMConstInt(ctx->i1, 0, false); /* glc */ 2388f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[4] = LLVMConstInt(ctx->i1, 0, false); /* slc */ 23892c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle res = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.load.format.v4f32", ctx->v4f32, 2390f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params, 5, 0); 2391f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2392f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie res = trim_vector(ctx, res, instr->dest.ssa.num_components); 2393f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie res = to_integer(ctx, res); 2394f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else { 23955c66d46d6ace0603227e4b919b132416444949e7Tom Stellard bool is_da = glsl_sampler_type_is_array(type) || 23965c66d46d6ace0603227e4b919b132416444949e7Tom Stellard glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE; 23975c66d46d6ace0603227e4b919b132416444949e7Tom Stellard LLVMValueRef da = is_da ? ctx->i32one : ctx->i32zero; 23985c66d46d6ace0603227e4b919b132416444949e7Tom Stellard LLVMValueRef glc = LLVMConstInt(ctx->i1, 0, false); 23995c66d46d6ace0603227e4b919b132416444949e7Tom Stellard LLVMValueRef slc = LLVMConstInt(ctx->i1, 0, false); 2400f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 24017083ca262581540265765bb0af7a11616fe1c6a3Dave Airlie params[0] = get_image_coords(ctx, instr); 2402f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE); 2403f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[2] = LLVMConstInt(ctx->i32, 15, false); /* dmask */ 24045c66d46d6ace0603227e4b919b132416444949e7Tom Stellard if (HAVE_LLVM <= 0x0309) { 24055c66d46d6ace0603227e4b919b132416444949e7Tom Stellard params[3] = LLVMConstInt(ctx->i1, 0, false); /* r128 */ 24065c66d46d6ace0603227e4b919b132416444949e7Tom Stellard params[4] = da; 24075c66d46d6ace0603227e4b919b132416444949e7Tom Stellard params[5] = glc; 24085c66d46d6ace0603227e4b919b132416444949e7Tom Stellard params[6] = slc; 24095c66d46d6ace0603227e4b919b132416444949e7Tom Stellard } else { 24105c66d46d6ace0603227e4b919b132416444949e7Tom Stellard LLVMValueRef lwe = LLVMConstInt(ctx->i1, 0, false); 24115c66d46d6ace0603227e4b919b132416444949e7Tom Stellard params[3] = glc; 24125c66d46d6ace0603227e4b919b132416444949e7Tom Stellard params[4] = slc; 24135c66d46d6ace0603227e4b919b132416444949e7Tom Stellard params[5] = lwe; 24145c66d46d6ace0603227e4b919b132416444949e7Tom Stellard params[6] = da; 24155c66d46d6ace0603227e4b919b132416444949e7Tom Stellard } 2416f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 24175c66d46d6ace0603227e4b919b132416444949e7Tom Stellard get_image_intr_name("llvm.amdgcn.image.load", 24185c66d46d6ace0603227e4b919b132416444949e7Tom Stellard ctx->v4f32, /* vdata */ 24195c66d46d6ace0603227e4b919b132416444949e7Tom Stellard LLVMTypeOf(params[0]), /* coords */ 24205c66d46d6ace0603227e4b919b132416444949e7Tom Stellard LLVMTypeOf(params[1]), /* rsrc */ 24215c66d46d6ace0603227e4b919b132416444949e7Tom Stellard intrinsic_name, sizeof(intrinsic_name)); 2422f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 24232c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle res = ac_emit_llvm_intrinsic(&ctx->ac, intrinsic_name, ctx->v4f32, 24242fdaf38c0106d87f89fa20fbe229e66d9a3ad14aDave Airlie params, 7, AC_FUNC_ATTR_READONLY); 2425f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2426f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return to_integer(ctx, res); 2427f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 2428f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2429f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void visit_image_store(struct nir_to_llvm_context *ctx, 2430f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_intrinsic_instr *instr) 2431f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 2432f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef params[8]; 24335c66d46d6ace0603227e4b919b132416444949e7Tom Stellard char intrinsic_name[64]; 2434f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const nir_variable *var = instr->variables[0]->var; 2435f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0); 2436f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0); 2437f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const struct glsl_type *type = glsl_without_array(var->type); 2438f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2439f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (ctx->stage == MESA_SHADER_FRAGMENT) 2440f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->shader_info->fs.writes_memory = true; 2441f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2442f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) { 2443f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[0] = to_float(ctx, get_src(ctx, instr->src[2])); /* data */ 2444f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER); 2445f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[2] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]), 2446f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */ 2447f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[3] = LLVMConstInt(ctx->i32, 0, false); /* voffset */ 2448f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[4] = i1false; /* glc */ 2449f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[5] = i1false; /* slc */ 24502c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->voidt, 2451f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params, 6, 0); 2452f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else { 24535c66d46d6ace0603227e4b919b132416444949e7Tom Stellard bool is_da = glsl_sampler_type_is_array(type) || 24545c66d46d6ace0603227e4b919b132416444949e7Tom Stellard glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE; 24555c66d46d6ace0603227e4b919b132416444949e7Tom Stellard LLVMValueRef da = is_da ? i1true : i1false; 24565c66d46d6ace0603227e4b919b132416444949e7Tom Stellard LLVMValueRef glc = i1false; 24575c66d46d6ace0603227e4b919b132416444949e7Tom Stellard LLVMValueRef slc = i1false; 2458f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 24595c66d46d6ace0603227e4b919b132416444949e7Tom Stellard params[0] = to_float(ctx, get_src(ctx, instr->src[2])); 24607083ca262581540265765bb0af7a11616fe1c6a3Dave Airlie params[1] = get_image_coords(ctx, instr); /* coords */ 2461f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[2] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE); 2462f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[3] = LLVMConstInt(ctx->i32, 15, false); /* dmask */ 24635c66d46d6ace0603227e4b919b132416444949e7Tom Stellard if (HAVE_LLVM <= 0x0309) { 24645c66d46d6ace0603227e4b919b132416444949e7Tom Stellard params[4] = i1false; /* r128 */ 24655c66d46d6ace0603227e4b919b132416444949e7Tom Stellard params[5] = da; 24665c66d46d6ace0603227e4b919b132416444949e7Tom Stellard params[6] = glc; 24675c66d46d6ace0603227e4b919b132416444949e7Tom Stellard params[7] = slc; 24685c66d46d6ace0603227e4b919b132416444949e7Tom Stellard } else { 24695c66d46d6ace0603227e4b919b132416444949e7Tom Stellard LLVMValueRef lwe = i1false; 24705c66d46d6ace0603227e4b919b132416444949e7Tom Stellard params[4] = glc; 24715c66d46d6ace0603227e4b919b132416444949e7Tom Stellard params[5] = slc; 24725c66d46d6ace0603227e4b919b132416444949e7Tom Stellard params[6] = lwe; 24735c66d46d6ace0603227e4b919b132416444949e7Tom Stellard params[7] = da; 24745c66d46d6ace0603227e4b919b132416444949e7Tom Stellard } 2475f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 24765c66d46d6ace0603227e4b919b132416444949e7Tom Stellard get_image_intr_name("llvm.amdgcn.image.store", 24775c66d46d6ace0603227e4b919b132416444949e7Tom Stellard LLVMTypeOf(params[0]), /* vdata */ 24785c66d46d6ace0603227e4b919b132416444949e7Tom Stellard LLVMTypeOf(params[1]), /* coords */ 24795c66d46d6ace0603227e4b919b132416444949e7Tom Stellard LLVMTypeOf(params[2]), /* rsrc */ 24805c66d46d6ace0603227e4b919b132416444949e7Tom Stellard intrinsic_name, sizeof(intrinsic_name)); 2481f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 24822c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle ac_emit_llvm_intrinsic(&ctx->ac, intrinsic_name, ctx->voidt, 2483f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params, 8, 0); 2484f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2485f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2486f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 2487f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2488f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef visit_image_atomic(struct nir_to_llvm_context *ctx, 2489f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_intrinsic_instr *instr) 2490f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 2491f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef params[6]; 2492f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int param_count = 0; 2493f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const nir_variable *var = instr->variables[0]->var; 2494f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0); 2495f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0); 2496f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const char *base_name = "llvm.amdgcn.image.atomic"; 2497f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const char *atomic_name; 2498f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef coords; 2499f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie char intrinsic_name[32], coords_type[8]; 2500f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const struct glsl_type *type = glsl_without_array(var->type); 2501f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2502f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (ctx->stage == MESA_SHADER_FRAGMENT) 2503f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->shader_info->fs.writes_memory = true; 2504f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2505f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[param_count++] = get_src(ctx, instr->src[2]); 2506f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (instr->intrinsic == nir_intrinsic_image_atomic_comp_swap) 2507f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[param_count++] = get_src(ctx, instr->src[3]); 2508f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2509f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) { 2510f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER); 2511f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie coords = params[param_count++] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]), 2512f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */ 2513f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[param_count++] = ctx->i32zero; /* voffset */ 2514f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[param_count++] = i1false; /* glc */ 2515f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[param_count++] = i1false; /* slc */ 2516f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else { 2517f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie bool da = glsl_sampler_type_is_array(type) || 2518f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE; 2519f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 25207083ca262581540265765bb0af7a11616fe1c6a3Dave Airlie coords = params[param_count++] = get_image_coords(ctx, instr); 2521f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE); 2522f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[param_count++] = i1false; /* r128 */ 2523f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[param_count++] = da ? i1true : i1false; /* da */ 2524f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[param_count++] = i1false; /* slc */ 2525f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2526f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2527f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie switch (instr->intrinsic) { 2528f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_image_atomic_add: 2529f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie atomic_name = "add"; 2530f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2531f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_image_atomic_min: 2532f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie atomic_name = "smin"; 2533f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2534f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_image_atomic_max: 2535f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie atomic_name = "smax"; 2536f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2537f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_image_atomic_and: 2538f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie atomic_name = "and"; 2539f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2540f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_image_atomic_or: 2541f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie atomic_name = "or"; 2542f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2543f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_image_atomic_xor: 2544f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie atomic_name = "xor"; 2545f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2546f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_image_atomic_exchange: 2547f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie atomic_name = "swap"; 2548f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2549f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_image_atomic_comp_swap: 2550f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie atomic_name = "cmpswap"; 2551f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2552f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie default: 2553f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie abort(); 2554f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2555f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie build_int_type_name(LLVMTypeOf(coords), 2556f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie coords_type, sizeof(coords_type)); 2557f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2558f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie snprintf(intrinsic_name, sizeof(intrinsic_name), 2559f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie "%s.%s.%s", base_name, atomic_name, coords_type); 25602c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle return ac_emit_llvm_intrinsic(&ctx->ac, intrinsic_name, ctx->i32, params, param_count, 0); 2561f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 2562f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2563f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx, 2564f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_intrinsic_instr *instr) 2565f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 2566f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef res; 2567f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef params[10]; 2568f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const nir_variable *var = instr->variables[0]->var; 2569f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const struct glsl_type *type = instr->variables[0]->var->type; 2570f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie bool da = glsl_sampler_type_is_array(var->type) || 2571f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie glsl_get_sampler_dim(var->type) == GLSL_SAMPLER_DIM_CUBE; 2572f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if(instr->variables[0]->deref.child) 2573f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie type = instr->variables[0]->deref.child->type; 2574f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2575f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) 2576f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return get_buffer_size(ctx, get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER), true); 2577f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[0] = ctx->i32zero; 2578f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE); 2579f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[2] = LLVMConstInt(ctx->i32, 15, false); 2580f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[3] = ctx->i32zero; 2581f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[4] = ctx->i32zero; 2582f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[5] = da ? ctx->i32one : ctx->i32zero; 2583f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[6] = ctx->i32zero; 2584f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[7] = ctx->i32zero; 2585f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[8] = ctx->i32zero; 2586f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie params[9] = ctx->i32zero; 2587f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 25882c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle res = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32, 25892fdaf38c0106d87f89fa20fbe229e66d9a3ad14aDave Airlie params, 10, AC_FUNC_ATTR_READNONE); 2590f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2591f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE && 2592f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie glsl_sampler_type_is_array(type)) { 2593f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false); 2594f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false); 2595f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, res, two, ""); 2596f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie z = LLVMBuildSDiv(ctx->builder, z, six, ""); 2597f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie res = LLVMBuildInsertElement(ctx->builder, res, z, two, ""); 2598f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2599f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return res; 2600f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 2601f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2602f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void emit_waitcnt(struct nir_to_llvm_context *ctx) 2603f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 2604f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef args[1] = { 2605f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, 0xf70, false), 2606f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie }; 26072c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.s.waitcnt", 2608f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->voidt, args, 1, 0); 2609f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 2610f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2611f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void emit_barrier(struct nir_to_llvm_context *ctx) 2612f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 2613f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie // TODO tess 26142c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.s.barrier", 2615f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->voidt, NULL, 0, 0); 2616f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 2617f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2618dd77faeca2819fc78a72f71ca9b996c209378cdeDave Airliestatic void emit_discard_if(struct nir_to_llvm_context *ctx, 2619dd77faeca2819fc78a72f71ca9b996c209378cdeDave Airlie nir_intrinsic_instr *instr) 2620dd77faeca2819fc78a72f71ca9b996c209378cdeDave Airlie{ 2621dd77faeca2819fc78a72f71ca9b996c209378cdeDave Airlie LLVMValueRef cond; 2622dd77faeca2819fc78a72f71ca9b996c209378cdeDave Airlie ctx->shader_info->fs.can_discard = true; 2623dd77faeca2819fc78a72f71ca9b996c209378cdeDave Airlie 2624dd77faeca2819fc78a72f71ca9b996c209378cdeDave Airlie cond = LLVMBuildICmp(ctx->builder, LLVMIntNE, 2625dd77faeca2819fc78a72f71ca9b996c209378cdeDave Airlie get_src(ctx, instr->src[0]), 2626dd77faeca2819fc78a72f71ca9b996c209378cdeDave Airlie ctx->i32zero, ""); 2627dd77faeca2819fc78a72f71ca9b996c209378cdeDave Airlie 2628dd77faeca2819fc78a72f71ca9b996c209378cdeDave Airlie cond = LLVMBuildSelect(ctx->builder, cond, 2629dd77faeca2819fc78a72f71ca9b996c209378cdeDave Airlie LLVMConstReal(ctx->f32, -1.0f), 2630dd77faeca2819fc78a72f71ca9b996c209378cdeDave Airlie ctx->f32zero, ""); 26312c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill", 2632d4392a877c0d4c930e8d2a016ae2c8b801e49c49Dave Airlie ctx->voidt, 2633d4392a877c0d4c930e8d2a016ae2c8b801e49c49Dave Airlie &cond, 1, 0); 2634dd77faeca2819fc78a72f71ca9b996c209378cdeDave Airlie} 2635dd77faeca2819fc78a72f71ca9b996c209378cdeDave Airlie 2636f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef 2637f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlievisit_load_local_invocation_index(struct nir_to_llvm_context *ctx) 2638f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 2639f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef result; 2640f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef thread_id = get_thread_id(ctx); 2641f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildAnd(ctx->builder, ctx->tg_size, 2642f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, 0xfc0, false), ""); 2643f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2644f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return LLVMBuildAdd(ctx->builder, result, thread_id, ""); 2645f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 2646f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2647f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef visit_var_atomic(struct nir_to_llvm_context *ctx, 2648f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_intrinsic_instr *instr) 2649f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 2650f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef ptr, result; 2651f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int idx = instr->variables[0]->var->data.driver_location; 2652f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src = get_src(ctx, instr->src[0]); 2653f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ptr = get_shared_memory_ptr(ctx, idx, ctx->i32); 2654f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2655f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (instr->intrinsic == nir_intrinsic_var_atomic_comp_swap) { 2656f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src1 = get_src(ctx, instr->src[1]); 2657f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildAtomicCmpXchg(ctx->builder, 2658f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ptr, src, src1, 2659f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMAtomicOrderingSequentiallyConsistent, 2660f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMAtomicOrderingSequentiallyConsistent, 2661f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie false); 2662f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else { 2663f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMAtomicRMWBinOp op; 2664f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie switch (instr->intrinsic) { 2665f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_var_atomic_add: 2666f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie op = LLVMAtomicRMWBinOpAdd; 2667f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2668f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_var_atomic_umin: 2669f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie op = LLVMAtomicRMWBinOpUMin; 2670f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2671f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_var_atomic_umax: 2672f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie op = LLVMAtomicRMWBinOpUMax; 2673f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2674f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_var_atomic_imin: 2675f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie op = LLVMAtomicRMWBinOpMin; 2676f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2677f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_var_atomic_imax: 2678f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie op = LLVMAtomicRMWBinOpMax; 2679f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2680f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_var_atomic_and: 2681f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie op = LLVMAtomicRMWBinOpAnd; 2682f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2683f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_var_atomic_or: 2684f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie op = LLVMAtomicRMWBinOpOr; 2685f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2686f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_var_atomic_xor: 2687f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie op = LLVMAtomicRMWBinOpXor; 2688f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2689f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_var_atomic_exchange: 2690f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie op = LLVMAtomicRMWBinOpXchg; 2691f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2692f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie default: 2693f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return NULL; 2694f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2695f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2696f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildAtomicRMW(ctx->builder, op, ptr, to_integer(ctx, src), 2697f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMAtomicOrderingSequentiallyConsistent, 2698f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie false); 2699f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2700f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return result; 2701f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 2702f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2703f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie#define INTERP_CENTER 0 2704f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie#define INTERP_CENTROID 1 2705f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie#define INTERP_SAMPLE 2 2706f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2707f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef lookup_interp_param(struct nir_to_llvm_context *ctx, 2708f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie enum glsl_interp_mode interp, unsigned location) 2709f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 2710f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie switch (interp) { 2711f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case INTERP_MODE_FLAT: 2712f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie default: 2713f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return NULL; 2714f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case INTERP_MODE_SMOOTH: 2715f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case INTERP_MODE_NONE: 2716f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (location == INTERP_CENTER) 2717f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return ctx->persp_center; 2718f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie else if (location == INTERP_CENTROID) 2719f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return ctx->persp_centroid; 2720f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie else if (location == INTERP_SAMPLE) 2721f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return ctx->persp_sample; 2722f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2723f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case INTERP_MODE_NOPERSPECTIVE: 2724f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (location == INTERP_CENTER) 2725f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return ctx->linear_center; 2726f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie else if (location == INTERP_CENTROID) 2727f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return ctx->linear_centroid; 2728f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie else if (location == INTERP_SAMPLE) 2729f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return ctx->linear_sample; 2730f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2731f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2732f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return NULL; 2733f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 2734f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2735f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef load_sample_position(struct nir_to_llvm_context *ctx, 2736f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef sample_id) 2737f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 2738f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* offset = sample_id * 8 (8 = 2 floats containing samplepos.xy) */ 2739f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef offset0 = LLVMBuildMul(ctx->builder, sample_id, LLVMConstInt(ctx->i32, 8, false), ""); 2740f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef offset1 = LLVMBuildAdd(ctx->builder, offset0, LLVMConstInt(ctx->i32, 4, false), ""); 2741f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef result[2]; 2742f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2743f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result[0] = build_indexed_load_const(ctx, ctx->sample_positions, offset0); 2744f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result[1] = build_indexed_load_const(ctx, ctx->sample_positions, offset1); 2745f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 274638c67f77ed35423bdba5ee1ffd5ab6e5b1765f90Nicolai Hähnle return ac_build_gather_values(&ctx->ac, result, 2); 2747f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 2748f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 27493c6151ccafeb47febb8320b7f4f7bbc0e819708eDave Airliestatic LLVMValueRef load_sample_pos(struct nir_to_llvm_context *ctx) 27503c6151ccafeb47febb8320b7f4f7bbc0e819708eDave Airlie{ 27513c6151ccafeb47febb8320b7f4f7bbc0e819708eDave Airlie LLVMValueRef values[2]; 27523c6151ccafeb47febb8320b7f4f7bbc0e819708eDave Airlie 27533c6151ccafeb47febb8320b7f4f7bbc0e819708eDave Airlie values[0] = emit_ffract(ctx, ctx->frag_pos[0]); 27543c6151ccafeb47febb8320b7f4f7bbc0e819708eDave Airlie values[1] = emit_ffract(ctx, ctx->frag_pos[1]); 275538c67f77ed35423bdba5ee1ffd5ab6e5b1765f90Nicolai Hähnle return ac_build_gather_values(&ctx->ac, values, 2); 27563c6151ccafeb47febb8320b7f4f7bbc0e819708eDave Airlie} 27573c6151ccafeb47febb8320b7f4f7bbc0e819708eDave Airlie 2758f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx, 2759f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_intrinsic_instr *instr) 2760f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 2761f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef result[2]; 2762f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef interp_param, attr_number; 2763f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned location; 2764f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned chan; 2765f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src_c0, src_c1; 2766f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const char *intr_name; 2767f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src0; 2768f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int input_index = instr->variables[0]->var->data.location - VARYING_SLOT_VAR0; 2769f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie switch (instr->intrinsic) { 2770f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_interp_var_at_centroid: 2771f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie location = INTERP_CENTROID; 2772f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2773f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_interp_var_at_sample: 2774f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_interp_var_at_offset: 2775f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie location = INTERP_SAMPLE; 2776f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src0 = get_src(ctx, instr->src[0]); 2777f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2778f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie default: 2779f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2780f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2781f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2782f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (instr->intrinsic == nir_intrinsic_interp_var_at_offset) { 2783f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src_c0 = to_float(ctx, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32zero, "")); 2784f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src_c1 = to_float(ctx, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32one, "")); 2785f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else if (instr->intrinsic == nir_intrinsic_interp_var_at_sample) { 2786f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef sample_position; 2787f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef halfval = LLVMConstReal(ctx->f32, 0.5f); 2788f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2789f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* fetch sample ID */ 2790f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie sample_position = load_sample_position(ctx, src0); 2791f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2792f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src_c0 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32zero, ""); 2793f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src_c0 = LLVMBuildFSub(ctx->builder, src_c0, halfval, ""); 2794f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src_c1 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32one, ""); 2795f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie src_c1 = LLVMBuildFSub(ctx->builder, src_c1, halfval, ""); 2796f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2797f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie interp_param = lookup_interp_param(ctx, instr->variables[0]->var->data.interpolation, location); 2798f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie attr_number = LLVMConstInt(ctx->i32, input_index, false); 2799f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2800f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (location == INTERP_SAMPLE) { 2801f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef ij_out[2]; 2802f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef ddxy_out = emit_ddxy_interp(ctx, interp_param); 2803f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2804f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* 2805f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * take the I then J parameters, and the DDX/Y for it, and 2806f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * calculate the IJ inputs for the interpolator. 2807f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * temp1 = ddx * offset/sample.x + I; 2808f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * interp_param.I = ddy * offset/sample.y + temp1; 2809f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * temp1 = ddx * offset/sample.x + J; 2810f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * interp_param.J = ddy * offset/sample.y + temp1; 2811f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie */ 2812f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned i = 0; i < 2; i++) { 2813f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef ix_ll = LLVMConstInt(ctx->i32, i, false); 2814f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef iy_ll = LLVMConstInt(ctx->i32, i + 2, false); 2815f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->builder, 2816f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ddxy_out, ix_ll, ""); 2817f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->builder, 2818f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ddxy_out, iy_ll, ""); 2819f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->builder, 2820f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie interp_param, ix_ll, ""); 2821f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef temp1, temp2; 2822f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2823f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie interp_el = LLVMBuildBitCast(ctx->builder, interp_el, 2824f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->f32, ""); 2825f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2826f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie temp1 = LLVMBuildFMul(ctx->builder, ddx_el, src_c0, ""); 2827f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie temp1 = LLVMBuildFAdd(ctx->builder, temp1, interp_el, ""); 2828f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2829f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie temp2 = LLVMBuildFMul(ctx->builder, ddy_el, src_c1, ""); 2830f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie temp2 = LLVMBuildFAdd(ctx->builder, temp2, temp1, ""); 2831f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2832f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ij_out[i] = LLVMBuildBitCast(ctx->builder, 2833f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie temp2, ctx->i32, ""); 2834f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 283538c67f77ed35423bdba5ee1ffd5ab6e5b1765f90Nicolai Hähnle interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2); 2836f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2837f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2838f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant"; 2839f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (chan = 0; chan < 2; chan++) { 2840f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef args[4]; 2841f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false); 2842f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2843f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[0] = llvm_chan; 2844f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[1] = attr_number; 2845f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[2] = ctx->prim_mask; 2846f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[3] = interp_param; 28472c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle result[chan] = ac_emit_llvm_intrinsic(&ctx->ac, intr_name, 2848f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->f32, args, args[3] ? 4 : 3, 28492fdaf38c0106d87f89fa20fbe229e66d9a3ad14aDave Airlie AC_FUNC_ATTR_READNONE); 2850f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 285138c67f77ed35423bdba5ee1ffd5ab6e5b1765f90Nicolai Hähnle return ac_build_gather_values(&ctx->ac, result, 2); 2852f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 2853f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2854f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void visit_intrinsic(struct nir_to_llvm_context *ctx, 2855f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_intrinsic_instr *instr) 2856f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 2857f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef result = NULL; 2858f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2859f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie switch (instr->intrinsic) { 2860f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_load_work_group_id: { 2861f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = ctx->workgroup_ids; 2862f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2863f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2864f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_load_base_vertex: { 2865f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = ctx->base_vertex; 2866f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2867f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2868f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_load_vertex_id_zero_base: { 2869f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = ctx->vertex_id; 2870f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2871f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2872f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_load_local_invocation_id: { 2873f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = ctx->local_invocation_ids; 2874f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2875f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2876f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_load_base_instance: 2877f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = ctx->start_instance; 2878f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2879f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_load_sample_id: 2880f3a3fea973a145fe16f70866dcfc22c3c5322a91Dave Airlie ctx->shader_info->fs.force_persample = true; 2881220912e21432dba996cb2694dea480c65cf6a7c6Dave Airlie result = unpack_param(ctx, ctx->ancillary, 8, 4); 2882f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 28833c6151ccafeb47febb8320b7f4f7bbc0e819708eDave Airlie case nir_intrinsic_load_sample_pos: 2884f3a3fea973a145fe16f70866dcfc22c3c5322a91Dave Airlie ctx->shader_info->fs.force_persample = true; 28853c6151ccafeb47febb8320b7f4f7bbc0e819708eDave Airlie result = load_sample_pos(ctx); 28863c6151ccafeb47febb8320b7f4f7bbc0e819708eDave Airlie break; 2887f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_load_front_face: 2888f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = ctx->front_face; 2889f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2890f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_load_instance_id: 2891f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = ctx->instance_id; 2892f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3, 2893f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->shader_info->vs.vgpr_comp_cnt); 2894f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2895f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_load_num_work_groups: 2896f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = ctx->num_work_groups; 2897f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2898f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_load_local_invocation_index: 2899f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = visit_load_local_invocation_index(ctx); 2900f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2901f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_load_push_constant: 2902f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = visit_load_push_constant(ctx, instr); 2903f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2904f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_vulkan_resource_index: 2905f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = visit_vulkan_resource_index(ctx, instr); 2906f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2907f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_store_ssbo: 2908f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie visit_store_ssbo(ctx, instr); 2909f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2910f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_load_ssbo: 2911f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = visit_load_buffer(ctx, instr); 2912f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2913f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_ssbo_atomic_add: 2914f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_ssbo_atomic_imin: 2915f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_ssbo_atomic_umin: 2916f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_ssbo_atomic_imax: 2917f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_ssbo_atomic_umax: 2918f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_ssbo_atomic_and: 2919f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_ssbo_atomic_or: 2920f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_ssbo_atomic_xor: 2921f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_ssbo_atomic_exchange: 2922f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_ssbo_atomic_comp_swap: 2923f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = visit_atomic_ssbo(ctx, instr); 2924f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2925f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_load_ubo: 292605533ce418851b12fd0a1e940a633f9280203aabBas Nieuwenhuizen result = visit_load_ubo_buffer(ctx, instr); 2927f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2928f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_get_buffer_size: 2929f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = visit_get_buffer_size(ctx, instr); 2930f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2931f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_load_var: 2932f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = visit_load_var(ctx, instr); 2933f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2934f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_store_var: 2935f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie visit_store_var(ctx, instr); 2936f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2937f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_image_load: 2938f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = visit_image_load(ctx, instr); 2939f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2940f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_image_store: 2941f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie visit_image_store(ctx, instr); 2942f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2943f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_image_atomic_add: 2944f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_image_atomic_min: 2945f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_image_atomic_max: 2946f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_image_atomic_and: 2947f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_image_atomic_or: 2948f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_image_atomic_xor: 2949f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_image_atomic_exchange: 2950f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_image_atomic_comp_swap: 2951f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = visit_image_atomic(ctx, instr); 2952f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2953f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_image_size: 2954f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = visit_image_size(ctx, instr); 2955f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2956f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_discard: 2957f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->shader_info->fs.can_discard = true; 29582c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kilp", 2959d4392a877c0d4c930e8d2a016ae2c8b801e49c49Dave Airlie ctx->voidt, 2960d4392a877c0d4c930e8d2a016ae2c8b801e49c49Dave Airlie NULL, 0, 0); 2961f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2962dd77faeca2819fc78a72f71ca9b996c209378cdeDave Airlie case nir_intrinsic_discard_if: 2963dd77faeca2819fc78a72f71ca9b996c209378cdeDave Airlie emit_discard_if(ctx, instr); 2964dd77faeca2819fc78a72f71ca9b996c209378cdeDave Airlie break; 2965f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_memory_barrier: 2966f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie emit_waitcnt(ctx); 2967f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2968f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_barrier: 2969f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie emit_barrier(ctx); 2970f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2971f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_var_atomic_add: 2972f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_var_atomic_imin: 2973f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_var_atomic_umin: 2974f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_var_atomic_imax: 2975f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_var_atomic_umax: 2976f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_var_atomic_and: 2977f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_var_atomic_or: 2978f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_var_atomic_xor: 2979f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_var_atomic_exchange: 2980f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_var_atomic_comp_swap: 2981f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = visit_var_atomic(ctx, instr); 2982f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2983f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_interp_var_at_centroid: 2984f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_interp_var_at_sample: 2985f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_intrinsic_interp_var_at_offset: 2986f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = visit_interp(ctx, instr); 2987f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2988f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie default: 2989f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie fprintf(stderr, "Unknown intrinsic: "); 2990f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_print_instr(&instr->instr, stderr); 2991f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie fprintf(stderr, "\n"); 2992f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 2993f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2994f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (result) { 2995f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result); 2996f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 2997f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 2998f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 2999f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx, 3000f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_deref_var *deref, 3001f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie enum desc_type desc_type) 3002f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 3003f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned desc_set = deref->var->data.descriptor_set; 3004f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef list = ctx->descriptor_sets[desc_set]; 3005f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct radv_descriptor_set_layout *layout = ctx->options->layout->set[desc_set].layout; 3006f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct radv_descriptor_set_binding_layout *binding = layout->binding + deref->var->data.binding; 3007f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned offset = binding->offset; 3008f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned stride = binding->size; 3009f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned type_size; 3010f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuilderRef builder = ctx->builder; 3011f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef type; 3012f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef index = NULL; 3013f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3014f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie assert(deref->var->data.binding < layout->binding_count); 3015f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3016f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie switch (desc_type) { 3017f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case DESC_IMAGE: 3018f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie type = ctx->v8i32; 3019f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie type_size = 32; 3020f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3021f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case DESC_FMASK: 3022f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie type = ctx->v8i32; 3023f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie offset += 32; 3024f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie type_size = 32; 3025f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3026f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case DESC_SAMPLER: 3027f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie type = ctx->v4i32; 3028f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) 3029f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie offset += 64; 3030f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3031f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie type_size = 16; 3032f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3033f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case DESC_BUFFER: 3034f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie type = ctx->v4i32; 3035f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie type_size = 16; 3036f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 303790c29784c66577f33ca5c8139e33ad027a212125Grazvydas Ignotas default: 303890c29784c66577f33ca5c8139e33ad027a212125Grazvydas Ignotas unreachable("invalid desc_type\n"); 3039f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3040f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3041f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (deref->deref.child) { 3042f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_deref_array *child = (nir_deref_array*)deref->deref.child; 3043f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3044f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie assert(child->deref_array_type != nir_deref_array_type_wildcard); 3045f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie offset += child->base_offset * stride; 3046f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (child->deref_array_type == nir_deref_array_type_indirect) { 3047f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie index = get_src(ctx, child->indirect); 3048f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3049f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3050f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3051f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie assert(stride % type_size == 0); 3052f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3053f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (!index) 3054f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie index = ctx->i32zero; 3055f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3056f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, stride / type_size, 0), ""); 3057e54af02567c8482c3aae983bdb6b58abe41c0eb0Dave Airlie 3058e54af02567c8482c3aae983bdb6b58abe41c0eb0Dave Airlie list = build_gep0(ctx, list, LLVMConstInt(ctx->i32, offset, 0)); 3059f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie list = LLVMBuildPointerCast(builder, list, const_array(type, 0), ""); 3060f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3061f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return build_indexed_load_const(ctx, list, index); 3062f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 3063f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3064f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void set_tex_fetch_args(struct nir_to_llvm_context *ctx, 3065f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct ac_tex_info *tinfo, 3066f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_tex_instr *instr, 3067f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_texop op, 3068f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef res_ptr, LLVMValueRef samp_ptr, 3069f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef *param, unsigned count, 3070f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned dmask) 3071f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 3072f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int num_args; 3073f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned is_rect = 0; 3074f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE; 3075f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3076f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (op == nir_texop_lod) 3077f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie da = false; 3078f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* Pad to power of two vector */ 3079f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie while (count < util_next_power_of_two(count)) 3080f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie param[count++] = LLVMGetUndef(ctx->i32); 3081f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3082f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (count > 1) 308338c67f77ed35423bdba5ee1ffd5ab6e5b1765f90Nicolai Hähnle tinfo->args[0] = ac_build_gather_values(&ctx->ac, param, count); 3084f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie else 3085f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tinfo->args[0] = param[0]; 3086f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3087f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tinfo->args[1] = res_ptr; 3088f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie num_args = 2; 3089f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3090f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (op == nir_texop_txf || 3091f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie op == nir_texop_txf_ms || 3092f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie op == nir_texop_query_levels || 3093f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie op == nir_texop_texture_samples || 3094f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie op == nir_texop_txs) 3095f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tinfo->dst_type = ctx->v4i32; 3096f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie else { 3097f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tinfo->dst_type = ctx->v4f32; 3098f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tinfo->args[num_args++] = samp_ptr; 3099f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3100f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3101f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF && op == nir_texop_txf) { 3102f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tinfo->args[0] = res_ptr; 3103f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tinfo->args[1] = LLVMConstInt(ctx->i32, 0, false); 3104f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tinfo->args[2] = param[0]; 3105f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tinfo->arg_count = 3; 3106f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return; 3107f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3108f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3109f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tinfo->args[num_args++] = LLVMConstInt(ctx->i32, dmask, 0); 3110f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tinfo->args[num_args++] = LLVMConstInt(ctx->i32, is_rect, 0); /* unorm */ 3111f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */ 3112f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tinfo->args[num_args++] = LLVMConstInt(ctx->i32, da ? 1 : 0, 0); 3113f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */ 3114f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */ 3115f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */ 3116f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */ 3117f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3118f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tinfo->arg_count = num_args; 3119f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 3120f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 31215cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund/* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL. 31225cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund * 31235cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund * SI-CI: 31245cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic 31255cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund * filtering manually. The driver sets img7 to a mask clearing 31265cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do: 31275cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund * s_and_b32 samp0, samp0, img7 31285cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund * 31295cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund * VI: 31305cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund * The ANISO_OVERRIDE sampler field enables this fix in TA. 31315cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund */ 31325cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglundstatic LLVMValueRef sici_fix_sampler_aniso(struct nir_to_llvm_context *ctx, 31335cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund LLVMValueRef res, LLVMValueRef samp) 31345cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund{ 31355cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund LLVMBuilderRef builder = ctx->builder; 31365cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund LLVMValueRef img7, samp0; 31375cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund 31385cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund if (ctx->options->chip_class >= VI) 31395cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund return samp; 31405cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund 31415cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund img7 = LLVMBuildExtractElement(builder, res, 31425cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund LLVMConstInt(ctx->i32, 7, 0), ""); 31435cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund samp0 = LLVMBuildExtractElement(builder, samp, 31445cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund LLVMConstInt(ctx->i32, 0, 0), ""); 31455cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund samp0 = LLVMBuildAnd(builder, samp0, img7, ""); 31465cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund return LLVMBuildInsertElement(builder, samp, samp0, 31475cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund LLVMConstInt(ctx->i32, 0, 0), ""); 31485cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund} 31495cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund 3150f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void tex_fetch_ptrs(struct nir_to_llvm_context *ctx, 3151f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_tex_instr *instr, 3152f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, 3153f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef *fmask_ptr) 3154f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 3155f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) 3156f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie *res_ptr = get_sampler_desc(ctx, instr->texture, DESC_BUFFER); 3157f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie else 3158f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie *res_ptr = get_sampler_desc(ctx, instr->texture, DESC_IMAGE); 3159f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (samp_ptr) { 3160f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (instr->sampler) 3161f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie *samp_ptr = get_sampler_desc(ctx, instr->sampler, DESC_SAMPLER); 3162f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie else 3163f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie *samp_ptr = get_sampler_desc(ctx, instr->texture, DESC_SAMPLER); 31645cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT) 31655cbcbc75f437b805e5c3c0c3b5c1192c0d68c1f5Fredrik Höglund *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr); 3166f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 316763406b669e74d9ff38a45f362ceac816a7dad668Dave Airlie if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms || 316863406b669e74d9ff38a45f362ceac816a7dad668Dave Airlie instr->op == nir_texop_samples_identical)) 3169f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie *fmask_ptr = get_sampler_desc(ctx, instr->texture, DESC_FMASK); 3170f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 3171f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 31729ecfbafedb09565b6ac5735c7893b0f58a46b8dfDave Airliestatic LLVMValueRef apply_round_slice(struct nir_to_llvm_context *ctx, 31739ecfbafedb09565b6ac5735c7893b0f58a46b8dfDave Airlie LLVMValueRef coord) 31749ecfbafedb09565b6ac5735c7893b0f58a46b8dfDave Airlie{ 31759ecfbafedb09565b6ac5735c7893b0f58a46b8dfDave Airlie coord = to_float(ctx, coord); 31769ecfbafedb09565b6ac5735c7893b0f58a46b8dfDave Airlie coord = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32, &coord, 1, 0); 31779ecfbafedb09565b6ac5735c7893b0f58a46b8dfDave Airlie coord = to_integer(ctx, coord); 31789ecfbafedb09565b6ac5735c7893b0f58a46b8dfDave Airlie return coord; 31799ecfbafedb09565b6ac5735c7893b0f58a46b8dfDave Airlie} 31809ecfbafedb09565b6ac5735c7893b0f58a46b8dfDave Airlie 3181f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) 3182f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 3183f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef result = NULL; 3184f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct ac_tex_info tinfo = { 0 }; 3185f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned dmask = 0xf; 3186f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef address[16]; 3187f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef coords[5]; 3188fd249c803e3ae2acb83f5e3b7152728e73228b7bIlia Mirkin LLVMValueRef coord = NULL, lod = NULL, comparator = NULL; 3189b8c9ce4459af5857c3f68f4bd6a3e33357e22337Bas Nieuwenhuizen LLVMValueRef bias = NULL, offsets = NULL; 3190f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL, sample_index = NULL; 3191f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef ddx = NULL, ddy = NULL; 3192f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef derivs[6]; 3193f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned chan, count = 0; 3194f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned const_src = 0, num_deriv_comp = 0; 3195f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3196f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tex_fetch_ptrs(ctx, instr, &res_ptr, &samp_ptr, &fmask_ptr); 3197f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3198f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned i = 0; i < instr->num_srcs; i++) { 3199f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie switch (instr->src[i].src_type) { 3200f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_tex_src_coord: 3201f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie coord = get_src(ctx, instr->src[i].src); 3202f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3203f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_tex_src_projector: 3204f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3205fd249c803e3ae2acb83f5e3b7152728e73228b7bIlia Mirkin case nir_tex_src_comparator: 3206fd249c803e3ae2acb83f5e3b7152728e73228b7bIlia Mirkin comparator = get_src(ctx, instr->src[i].src); 3207f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3208f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_tex_src_offset: 3209f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie offsets = get_src(ctx, instr->src[i].src); 3210f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const_src = i; 3211f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3212f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_tex_src_bias: 3213f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie bias = get_src(ctx, instr->src[i].src); 3214f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3215f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_tex_src_lod: 3216f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie lod = get_src(ctx, instr->src[i].src); 3217f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3218f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_tex_src_ms_index: 3219f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie sample_index = get_src(ctx, instr->src[i].src); 3220f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3221f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_tex_src_ms_mcs: 3222f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3223f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_tex_src_ddx: 3224f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ddx = get_src(ctx, instr->src[i].src); 3225f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie num_deriv_comp = instr->src[i].src.ssa->num_components; 3226f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3227f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_tex_src_ddy: 3228f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ddy = get_src(ctx, instr->src[i].src); 3229f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3230f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_tex_src_texture_offset: 3231f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_tex_src_sampler_offset: 3232f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_tex_src_plane: 3233f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie default: 3234f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3235f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3236f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3237f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3238eaf311d90d20c819e5fa8a9cb7dd781bdb257041Dave Airlie if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { 323921e3f2f24c45e76d66c520173ced26158ef079fcDave Airlie result = get_buffer_size(ctx, res_ptr, true); 3240eaf311d90d20c819e5fa8a9cb7dd781bdb257041Dave Airlie goto write_result; 3241eaf311d90d20c819e5fa8a9cb7dd781bdb257041Dave Airlie } 3242eaf311d90d20c819e5fa8a9cb7dd781bdb257041Dave Airlie 3243f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (instr->op == nir_texop_texture_samples) { 32442de85eb97ab2ef45ec23f694a566cd0ec8192885Dave Airlie LLVMValueRef res, samples, is_msaa; 3245f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie res = LLVMBuildBitCast(ctx->builder, res_ptr, ctx->v8i32, ""); 3246f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie samples = LLVMBuildExtractElement(ctx->builder, res, 3247f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, 3, false), ""); 32482de85eb97ab2ef45ec23f694a566cd0ec8192885Dave Airlie is_msaa = LLVMBuildLShr(ctx->builder, samples, 32492de85eb97ab2ef45ec23f694a566cd0ec8192885Dave Airlie LLVMConstInt(ctx->i32, 28, false), ""); 32502de85eb97ab2ef45ec23f694a566cd0ec8192885Dave Airlie is_msaa = LLVMBuildAnd(ctx->builder, is_msaa, 32512de85eb97ab2ef45ec23f694a566cd0ec8192885Dave Airlie LLVMConstInt(ctx->i32, 0xe, false), ""); 32522de85eb97ab2ef45ec23f694a566cd0ec8192885Dave Airlie is_msaa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, is_msaa, 32532de85eb97ab2ef45ec23f694a566cd0ec8192885Dave Airlie LLVMConstInt(ctx->i32, 0xe, false), ""); 32542de85eb97ab2ef45ec23f694a566cd0ec8192885Dave Airlie 3255f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie samples = LLVMBuildLShr(ctx->builder, samples, 3256f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, 16, false), ""); 3257f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie samples = LLVMBuildAnd(ctx->builder, samples, 3258f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, 0xf, false), ""); 3259f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie samples = LLVMBuildShl(ctx->builder, ctx->i32one, 3260f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie samples, ""); 32612de85eb97ab2ef45ec23f694a566cd0ec8192885Dave Airlie samples = LLVMBuildSelect(ctx->builder, is_msaa, samples, 32622de85eb97ab2ef45ec23f694a566cd0ec8192885Dave Airlie ctx->i32one, ""); 3263f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = samples; 3264f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie goto write_result; 3265f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3266f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3267f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (coord) 3268f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (chan = 0; chan < instr->coord_components; chan++) 3269f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie coords[chan] = llvm_extract_elem(ctx, coord, chan); 3270f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3271f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (offsets && instr->op != nir_texop_txf) { 3272f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef offset[3], pack; 3273f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (chan = 0; chan < 3; ++chan) 3274f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie offset[chan] = ctx->i32zero; 3275f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3276f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie tinfo.has_offset = true; 3277f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (chan = 0; chan < get_llvm_num_components(offsets); chan++) { 3278f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie offset[chan] = llvm_extract_elem(ctx, offsets, chan); 3279f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie offset[chan] = LLVMBuildAnd(ctx->builder, offset[chan], 3280f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, 0x3f, false), ""); 3281f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (chan) 3282f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie offset[chan] = LLVMBuildShl(ctx->builder, offset[chan], 3283f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, chan * 8, false), ""); 3284f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3285f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie pack = LLVMBuildOr(ctx->builder, offset[0], offset[1], ""); 3286f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie pack = LLVMBuildOr(ctx->builder, pack, offset[2], ""); 3287f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie address[count++] = pack; 3288f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3289f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3290f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* pack LOD bias value */ 3291f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (instr->op == nir_texop_txb && bias) { 3292f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie address[count++] = bias; 3293f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3294f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3295f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* Pack depth comparison value */ 3296fd249c803e3ae2acb83f5e3b7152728e73228b7bIlia Mirkin if (instr->is_shadow && comparator) { 3297fd249c803e3ae2acb83f5e3b7152728e73228b7bIlia Mirkin address[count++] = llvm_extract_elem(ctx, comparator, 0); 3298f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3299f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3300f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* pack derivatives */ 3301f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (ddx || ddy) { 3302f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie switch (instr->sampler_dim) { 3303f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case GLSL_SAMPLER_DIM_3D: 3304f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case GLSL_SAMPLER_DIM_CUBE: 3305f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie num_deriv_comp = 3; 3306f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3307f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case GLSL_SAMPLER_DIM_2D: 3308f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie default: 3309f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie num_deriv_comp = 2; 3310f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3311f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case GLSL_SAMPLER_DIM_1D: 3312f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie num_deriv_comp = 1; 3313f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3314f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3315f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3316f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned i = 0; i < num_deriv_comp; i++) { 3317f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie derivs[i * 2] = to_float(ctx, llvm_extract_elem(ctx, ddx, i)); 3318f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie derivs[i * 2 + 1] = to_float(ctx, llvm_extract_elem(ctx, ddy, i)); 3319f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3320f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3321f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3322f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) { 3323f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (chan = 0; chan < instr->coord_components; chan++) 3324f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie coords[chan] = to_float(ctx, coords[chan]); 3325f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (instr->coord_components == 3) 3326f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie coords[3] = LLVMGetUndef(ctx->f32); 3327a0ce09b4b2a3063e49a02de3d12096cf462d10a3Nicolai Hähnle ac_prepare_cube_coords(&ctx->ac, 3328a0ce09b4b2a3063e49a02de3d12096cf462d10a3Nicolai Hähnle instr->op == nir_texop_txd, instr->is_array, 3329a0ce09b4b2a3063e49a02de3d12096cf462d10a3Nicolai Hähnle coords, derivs); 3330f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (num_deriv_comp) 3331f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie num_deriv_comp--; 3332f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3333f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3334f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (ddx || ddy) { 3335f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned i = 0; i < num_deriv_comp * 2; i++) 3336f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie address[count++] = derivs[i]; 3337f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3338f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3339f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* Pack texture coordinates */ 3340f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (coord) { 3341f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie address[count++] = coords[0]; 33429ecfbafedb09565b6ac5735c7893b0f58a46b8dfDave Airlie if (instr->coord_components > 1) { 33439ecfbafedb09565b6ac5735c7893b0f58a46b8dfDave Airlie if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array && instr->op != nir_texop_txf) { 33449ecfbafedb09565b6ac5735c7893b0f58a46b8dfDave Airlie coords[1] = apply_round_slice(ctx, coords[1]); 33459ecfbafedb09565b6ac5735c7893b0f58a46b8dfDave Airlie } 3346f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie address[count++] = coords[1]; 33479ecfbafedb09565b6ac5735c7893b0f58a46b8dfDave Airlie } 3348f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (instr->coord_components > 2) { 3349f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* This seems like a bit of a hack - but it passes Vulkan CTS with it */ 3350f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (instr->sampler_dim != GLSL_SAMPLER_DIM_3D && instr->op != nir_texop_txf) { 33519ecfbafedb09565b6ac5735c7893b0f58a46b8dfDave Airlie coords[2] = apply_round_slice(ctx, coords[2]); 3352f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3353f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie address[count++] = coords[2]; 3354f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3355f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3356f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3357f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* Pack LOD */ 3358f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if ((instr->op == nir_texop_txl || instr->op == nir_texop_txf) && lod) { 3359f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie address[count++] = lod; 3360f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else if (instr->op == nir_texop_txf_ms && sample_index) { 3361f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie address[count++] = sample_index; 3362f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else if(instr->op == nir_texop_txs) { 3363f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie count = 0; 3364b1340fd708bb873617b8a529ac45cbc9507bd6c4Dave Airlie if (lod) 3365b1340fd708bb873617b8a529ac45cbc9507bd6c4Dave Airlie address[count++] = lod; 3366b1340fd708bb873617b8a529ac45cbc9507bd6c4Dave Airlie else 3367b1340fd708bb873617b8a529ac45cbc9507bd6c4Dave Airlie address[count++] = ctx->i32zero; 3368f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3369f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3370f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (chan = 0; chan < count; chan++) { 3371f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie address[chan] = LLVMBuildBitCast(ctx->builder, 3372f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie address[chan], ctx->i32, ""); 3373f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3374f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3375f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (instr->op == nir_texop_samples_identical) { 3376f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef txf_address[4]; 3377f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct ac_tex_info txf_info = { 0 }; 3378f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned txf_count = count; 3379f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie memcpy(txf_address, address, sizeof(txf_address)); 3380f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3381f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (!instr->is_array) 3382f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie txf_address[2] = ctx->i32zero; 3383f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie txf_address[3] = ctx->i32zero; 3384f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3385f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie set_tex_fetch_args(ctx, &txf_info, instr, nir_texop_txf, 338663406b669e74d9ff38a45f362ceac816a7dad668Dave Airlie fmask_ptr, NULL, 3387f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie txf_address, txf_count, 0xf); 3388f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3389f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = build_tex_intrinsic(ctx, instr, &txf_info); 339067c91ef2a228b0843bdbb4b7b7128ef45e3dd71fDave Airlie 339167c91ef2a228b0843bdbb4b7b7128ef45e3dd71fDave Airlie result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, ""); 3392d842546ad1ebdb4825f0cbca2d68a56139d88e2aDave Airlie result = emit_int_cmp(ctx, LLVMIntEQ, result, ctx->i32zero); 3393f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie goto write_result; 3394f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3395f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 339663406b669e74d9ff38a45f362ceac816a7dad668Dave Airlie /* Adjust the sample index according to FMASK. 339763406b669e74d9ff38a45f362ceac816a7dad668Dave Airlie * 339863406b669e74d9ff38a45f362ceac816a7dad668Dave Airlie * For uncompressed MSAA surfaces, FMASK should return 0x76543210, 339963406b669e74d9ff38a45f362ceac816a7dad668Dave Airlie * which is the identity mapping. Each nibble says which physical sample 340063406b669e74d9ff38a45f362ceac816a7dad668Dave Airlie * should be fetched to get that sample. 340163406b669e74d9ff38a45f362ceac816a7dad668Dave Airlie * 340263406b669e74d9ff38a45f362ceac816a7dad668Dave Airlie * For example, 0x11111100 means there are only 2 samples stored and 340363406b669e74d9ff38a45f362ceac816a7dad668Dave Airlie * the second sample covers 3/4 of the pixel. When reading samples 0 340463406b669e74d9ff38a45f362ceac816a7dad668Dave Airlie * and 1, return physical sample 0 (determined by the first two 0s 340563406b669e74d9ff38a45f362ceac816a7dad668Dave Airlie * in FMASK), otherwise return physical sample 1. 340663406b669e74d9ff38a45f362ceac816a7dad668Dave Airlie * 340763406b669e74d9ff38a45f362ceac816a7dad668Dave Airlie * The sample index should be adjusted as follows: 340863406b669e74d9ff38a45f362ceac816a7dad668Dave Airlie * sample_index = (fmask >> (sample_index * 4)) & 0xF; 340963406b669e74d9ff38a45f362ceac816a7dad668Dave Airlie */ 34101e868de4500a46abde745a517ead42afd3a904caDave Airlie if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS && 34111e868de4500a46abde745a517ead42afd3a904caDave Airlie instr->op != nir_texop_txs) { 3412f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef txf_address[4]; 3413f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct ac_tex_info txf_info = { 0 }; 3414f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned txf_count = count; 3415f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie memcpy(txf_address, address, sizeof(txf_address)); 3416f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3417f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (!instr->is_array) 3418f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie txf_address[2] = ctx->i32zero; 3419f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie txf_address[3] = ctx->i32zero; 3420f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3421f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie set_tex_fetch_args(ctx, &txf_info, instr, nir_texop_txf, 342263406b669e74d9ff38a45f362ceac816a7dad668Dave Airlie fmask_ptr, NULL, 3423f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie txf_address, txf_count, 0xf); 3424f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3425f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = build_tex_intrinsic(ctx, instr, &txf_info); 3426f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false); 3427f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef F = LLVMConstInt(ctx->i32, 0xf, false); 3428f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3429f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef fmask = LLVMBuildExtractElement(ctx->builder, 3430f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result, 3431f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->i32zero, ""); 3432f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3433f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned sample_chan = instr->is_array ? 3 : 2; 3434f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3435f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef sample_index4 = 3436f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildMul(ctx->builder, address[sample_chan], four, ""); 3437f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef shifted_fmask = 3438f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildLShr(ctx->builder, fmask, sample_index4, ""); 3439f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef final_sample = 3440f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildAnd(ctx->builder, shifted_fmask, F, ""); 3441f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3442f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK 3443f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * resource descriptor is 0 (invalid), 3444f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie */ 3445f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef fmask_desc = 3446f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildBitCast(ctx->builder, fmask_ptr, 3447f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->v8i32, ""); 3448f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3449f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef fmask_word1 = 3450f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildExtractElement(ctx->builder, fmask_desc, 3451f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->i32one, ""); 3452f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3453f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef word1_is_nonzero = 3454f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildICmp(ctx->builder, LLVMIntNE, 3455f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie fmask_word1, ctx->i32zero, ""); 3456f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3457f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* Replace the MSAA sample index. */ 3458f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie address[sample_chan] = 3459f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildSelect(ctx->builder, word1_is_nonzero, 3460f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie final_sample, address[sample_chan], ""); 3461f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3462f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3463f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (offsets && instr->op == nir_texop_txf) { 3464f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_const_value *const_offset = 3465f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_src_as_const_value(instr->src[const_src].src); 3466bb8ac183404541ca8dee31563709d5aca8de0e73Dave Airlie int num_offsets = instr->src[const_src].src.ssa->num_components; 3467f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie assert(const_offset); 3468bb8ac183404541ca8dee31563709d5aca8de0e73Dave Airlie num_offsets = MIN2(num_offsets, instr->coord_components); 3469bb8ac183404541ca8dee31563709d5aca8de0e73Dave Airlie if (num_offsets > 2) 3470f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie address[2] = LLVMBuildAdd(ctx->builder, 3471f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie address[2], LLVMConstInt(ctx->i32, const_offset->i32[2], false), ""); 3472bb8ac183404541ca8dee31563709d5aca8de0e73Dave Airlie if (num_offsets > 1) 3473f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie address[1] = LLVMBuildAdd(ctx->builder, 3474f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie address[1], LLVMConstInt(ctx->i32, const_offset->i32[1], false), ""); 3475f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie address[0] = LLVMBuildAdd(ctx->builder, 3476f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie address[0], LLVMConstInt(ctx->i32, const_offset->i32[0], false), ""); 3477f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3478f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3479f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3480f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* TODO TG4 support */ 3481f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (instr->op == nir_texop_tg4) { 3482f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (instr->is_shadow) 3483f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie dmask = 1; 3484f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie else 3485f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie dmask = 1 << instr->component; 3486f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3487f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie set_tex_fetch_args(ctx, &tinfo, instr, instr->op, 3488f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie res_ptr, samp_ptr, address, count, dmask); 3489f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3490f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = build_tex_intrinsic(ctx, instr, &tinfo); 3491f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3492b2e217369e1ca4bf9d7741721559a4506b1f0ce8Dave Airlie if (instr->op == nir_texop_query_levels) 3493f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, 3, false), ""); 34948033f78f94c7c6349e1c6a4d63fe3accb34b36f1Dave Airlie else if (instr->is_shadow && instr->op != nir_texop_txs && instr->op != nir_texop_lod && instr->op != nir_texop_tg4) 3495b2e217369e1ca4bf9d7741721559a4506b1f0ce8Dave Airlie result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, ""); 3496f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie else if (instr->op == nir_texop_txs && 3497f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && 3498f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie instr->is_array) { 3499f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false); 3500f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false); 3501f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, result, two, ""); 3502f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie z = LLVMBuildSDiv(ctx->builder, z, six, ""); 3503f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = LLVMBuildInsertElement(ctx->builder, result, z, two, ""); 3504d548fa882b865b8e5052954b872e392312c38ab8Dave Airlie } else if (instr->dest.ssa.num_components != 4) 3505d548fa882b865b8e5052954b872e392312c38ab8Dave Airlie result = trim_vector(ctx, result, instr->dest.ssa.num_components); 3506f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3507f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliewrite_result: 3508f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (result) { 3509f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie assert(instr->dest.is_ssa); 3510f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie result = to_integer(ctx, result); 3511f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result); 3512f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3513f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 3514f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3515f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3516f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void visit_phi(struct nir_to_llvm_context *ctx, nir_phi_instr *instr) 3517f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 3518f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa); 3519f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef result = LLVMBuildPhi(ctx->builder, type, ""); 3520f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3521f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result); 3522f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie _mesa_hash_table_insert(ctx->phis, instr, result); 3523f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 3524f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3525f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void visit_post_phi(struct nir_to_llvm_context *ctx, 3526f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_phi_instr *instr, 3527f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef llvm_phi) 3528f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 3529f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_foreach_phi_src(src, instr) { 3530f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBasicBlockRef block = get_block(ctx, src->pred); 3531f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef llvm_src = get_src(ctx, src->src); 3532f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3533f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1); 3534f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3535f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 3536f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3537f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void phi_post_pass(struct nir_to_llvm_context *ctx) 3538f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 3539f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct hash_entry *entry; 3540f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie hash_table_foreach(ctx->phis, entry) { 3541f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie visit_post_phi(ctx, (nir_phi_instr*)entry->key, 3542f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie (LLVMValueRef)entry->data); 3543f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3544f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 3545f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3546f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3547f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void visit_ssa_undef(struct nir_to_llvm_context *ctx, 3548f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_ssa_undef_instr *instr) 3549f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 3550f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned num_components = instr->def.num_components; 3551f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef undef; 3552f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3553f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (num_components == 1) 3554f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie undef = LLVMGetUndef(ctx->i32); 3555f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie else { 3556f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie undef = LLVMGetUndef(LLVMVectorType(ctx->i32, num_components)); 3557f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3558f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie _mesa_hash_table_insert(ctx->defs, &instr->def, undef); 3559f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 3560f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3561f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void visit_jump(struct nir_to_llvm_context *ctx, 3562f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_jump_instr *instr) 3563f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 3564f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie switch (instr->type) { 3565f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_jump_break: 3566f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildBr(ctx->builder, ctx->break_block); 3567f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMClearInsertionPosition(ctx->builder); 3568f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3569f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_jump_continue: 3570f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildBr(ctx->builder, ctx->continue_block); 3571f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMClearInsertionPosition(ctx->builder); 3572f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3573f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie default: 3574f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie fprintf(stderr, "Unknown NIR jump instr: "); 3575f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_print_instr(&instr->instr, stderr); 3576f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie fprintf(stderr, "\n"); 3577f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie abort(); 3578f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3579f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 3580f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3581f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void visit_cf_list(struct nir_to_llvm_context *ctx, 3582f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct exec_list *list); 3583f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3584f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void visit_block(struct nir_to_llvm_context *ctx, nir_block *block) 3585f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 3586f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBasicBlockRef llvm_block = LLVMGetInsertBlock(ctx->builder); 3587f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_foreach_instr(instr, block) 3588f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie { 3589f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie switch (instr->type) { 3590f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_instr_type_alu: 3591f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie visit_alu(ctx, nir_instr_as_alu(instr)); 3592f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3593f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_instr_type_load_const: 3594f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie visit_load_const(ctx, nir_instr_as_load_const(instr)); 3595f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3596f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_instr_type_intrinsic: 3597f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie visit_intrinsic(ctx, nir_instr_as_intrinsic(instr)); 3598f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3599f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_instr_type_tex: 3600f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie visit_tex(ctx, nir_instr_as_tex(instr)); 3601f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3602f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_instr_type_phi: 3603f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie visit_phi(ctx, nir_instr_as_phi(instr)); 3604f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3605f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_instr_type_ssa_undef: 3606f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr)); 3607f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3608f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_instr_type_jump: 3609f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie visit_jump(ctx, nir_instr_as_jump(instr)); 3610f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3611f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie default: 3612f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie fprintf(stderr, "Unknown NIR instr type: "); 3613f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_print_instr(instr, stderr); 3614f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie fprintf(stderr, "\n"); 3615f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie abort(); 3616f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3617f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3618f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3619f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie _mesa_hash_table_insert(ctx->defs, block, llvm_block); 3620f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 3621f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3622f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void visit_if(struct nir_to_llvm_context *ctx, nir_if *if_stmt) 3623f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 3624f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef value = get_src(ctx, if_stmt->condition); 3625f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3626f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBasicBlockRef merge_block = 3627f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, ""); 3628f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBasicBlockRef if_block = 3629f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, ""); 3630f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBasicBlockRef else_block = merge_block; 3631f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (!exec_list_is_empty(&if_stmt->else_list)) 3632f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie else_block = LLVMAppendBasicBlockInContext( 3633f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->context, ctx->main_function, ""); 3634f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3635f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef cond = LLVMBuildICmp(ctx->builder, LLVMIntNE, value, 3636f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstInt(ctx->i32, 0, false), ""); 3637f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildCondBr(ctx->builder, cond, if_block, else_block); 3638f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3639f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMPositionBuilderAtEnd(ctx->builder, if_block); 3640f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie visit_cf_list(ctx, &if_stmt->then_list); 3641f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (LLVMGetInsertBlock(ctx->builder)) 3642f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildBr(ctx->builder, merge_block); 3643f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3644f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (!exec_list_is_empty(&if_stmt->else_list)) { 3645f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMPositionBuilderAtEnd(ctx->builder, else_block); 3646f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie visit_cf_list(ctx, &if_stmt->else_list); 3647f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (LLVMGetInsertBlock(ctx->builder)) 3648f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildBr(ctx->builder, merge_block); 3649f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3650f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3651f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMPositionBuilderAtEnd(ctx->builder, merge_block); 3652f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 3653f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3654f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void visit_loop(struct nir_to_llvm_context *ctx, nir_loop *loop) 3655f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 3656f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBasicBlockRef continue_parent = ctx->continue_block; 3657f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBasicBlockRef break_parent = ctx->break_block; 3658f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3659f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->continue_block = 3660f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, ""); 3661f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->break_block = 3662f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, ""); 3663f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3664f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildBr(ctx->builder, ctx->continue_block); 3665f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMPositionBuilderAtEnd(ctx->builder, ctx->continue_block); 3666f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie visit_cf_list(ctx, &loop->body); 3667f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3668f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (LLVMGetInsertBlock(ctx->builder)) 3669f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildBr(ctx->builder, ctx->continue_block); 3670f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMPositionBuilderAtEnd(ctx->builder, ctx->break_block); 3671f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3672f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->continue_block = continue_parent; 3673f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->break_block = break_parent; 3674f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 3675f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3676f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void visit_cf_list(struct nir_to_llvm_context *ctx, 3677f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct exec_list *list) 3678f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 3679f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie foreach_list_typed(nir_cf_node, node, node, list) 3680f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie { 3681f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie switch (node->type) { 3682f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_cf_node_block: 3683f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie visit_block(ctx, nir_cf_node_as_block(node)); 3684f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3685f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3686f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_cf_node_if: 3687f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie visit_if(ctx, nir_cf_node_as_if(node)); 3688f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3689f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3690f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case nir_cf_node_loop: 3691f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie visit_loop(ctx, nir_cf_node_as_loop(node)); 3692f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3693f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3694f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie default: 3695f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie assert(0); 3696f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3697f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3698f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 3699f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3700f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void 3701f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliehandle_vs_input_decl(struct nir_to_llvm_context *ctx, 3702f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct nir_variable *variable) 3703f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 3704f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef t_list_ptr = ctx->vertex_buffers; 3705f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef t_offset; 3706f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef t_list; 3707f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef args[3]; 3708f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef input; 3709f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef buffer_index; 3710f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int index = variable->data.location - VERT_ATTRIB_GENERIC0; 3711f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int idx = variable->data.location; 3712f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned attrib_count = glsl_count_attribute_slots(variable->type, true); 3713f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3714f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie variable->data.driver_location = idx * 4; 3715f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3716f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (ctx->options->key.vs.instance_rate_inputs & (1u << index)) { 3717f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie buffer_index = LLVMBuildAdd(ctx->builder, ctx->instance_id, 3718f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->start_instance, ""); 3719f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3, 3720f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->shader_info->vs.vgpr_comp_cnt); 3721f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else 3722f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie buffer_index = LLVMBuildAdd(ctx->builder, ctx->vertex_id, 3723f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->base_vertex, ""); 3724f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3725f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned i = 0; i < attrib_count; ++i, ++idx) { 3726f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie t_offset = LLVMConstInt(ctx->i32, index + i, false); 3727f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3728f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie t_list = build_indexed_load_const(ctx, t_list_ptr, t_offset); 3729f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[0] = t_list; 3730f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[1] = LLVMConstInt(ctx->i32, 0, false); 3731f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[2] = buffer_index; 37322c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle input = ac_emit_llvm_intrinsic(&ctx->ac, 3733f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie "llvm.SI.vs.load.input", ctx->v4f32, args, 3, 37342fdaf38c0106d87f89fa20fbe229e66d9a3ad14aDave Airlie AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND); 3735f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3736f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned chan = 0; chan < 4; chan++) { 3737f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false); 3738f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] = 3739f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie to_integer(ctx, LLVMBuildExtractElement(ctx->builder, 3740f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie input, llvm_chan, "")); 3741f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3742f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3743f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 3744f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3745f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3746f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void interp_fs_input(struct nir_to_llvm_context *ctx, 3747f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned attr, 3748f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef interp_param, 3749f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef prim_mask, 3750f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef result[4]) 3751f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 3752f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const char *intr_name; 3753f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef attr_number; 3754f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned chan; 3755f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3756f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie attr_number = LLVMConstInt(ctx->i32, attr, false); 3757f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3758f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* fs.constant returns the param from the middle vertex, so it's not 3759f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * really useful for flat shading. It's meant to be used for custom 3760f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * interpolation (but the intrinsic can't fetch from the other two 3761f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * vertices). 3762f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * 3763f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state 3764f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * to do the right thing. The only reason we use fs.constant is that 3765f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * fs.interp cannot be used on integers, because they can be equal 3766f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * to NaN. 3767f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie */ 3768f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant"; 3769f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3770f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (chan = 0; chan < 4; chan++) { 3771f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef args[4]; 3772f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false); 3773f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3774f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[0] = llvm_chan; 3775f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[1] = attr_number; 3776f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[2] = prim_mask; 3777f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[3] = interp_param; 37782c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle result[chan] = ac_emit_llvm_intrinsic(&ctx->ac, intr_name, 3779f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->f32, args, args[3] ? 4 : 3, 37802fdaf38c0106d87f89fa20fbe229e66d9a3ad14aDave Airlie AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND); 3781f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3782f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 3783f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3784f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void 3785f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliehandle_fs_input_decl(struct nir_to_llvm_context *ctx, 3786f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct nir_variable *variable) 3787f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 3788f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int idx = variable->data.location; 3789f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); 3790f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef interp; 3791f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3792f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie variable->data.driver_location = idx * 4; 3793f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->input_mask |= ((1ull << attrib_count) - 1) << variable->data.location; 3794f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3795f3a3fea973a145fe16f70866dcfc22c3c5322a91Dave Airlie if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) { 3796f3a3fea973a145fe16f70866dcfc22c3c5322a91Dave Airlie unsigned interp_type; 3797f3a3fea973a145fe16f70866dcfc22c3c5322a91Dave Airlie if (variable->data.sample) { 3798f3a3fea973a145fe16f70866dcfc22c3c5322a91Dave Airlie interp_type = INTERP_SAMPLE; 3799f3a3fea973a145fe16f70866dcfc22c3c5322a91Dave Airlie ctx->shader_info->fs.force_persample = true; 3800f3a3fea973a145fe16f70866dcfc22c3c5322a91Dave Airlie } else if (variable->data.centroid) 3801f3a3fea973a145fe16f70866dcfc22c3c5322a91Dave Airlie interp_type = INTERP_CENTROID; 3802f3a3fea973a145fe16f70866dcfc22c3c5322a91Dave Airlie else 3803f3a3fea973a145fe16f70866dcfc22c3c5322a91Dave Airlie interp_type = INTERP_CENTER; 3804f3a3fea973a145fe16f70866dcfc22c3c5322a91Dave Airlie 3805f3a3fea973a145fe16f70866dcfc22c3c5322a91Dave Airlie interp = lookup_interp_param(ctx, variable->data.interpolation, interp_type); 3806f3a3fea973a145fe16f70866dcfc22c3c5322a91Dave Airlie } else 3807f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie interp = NULL; 3808f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3809f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned i = 0; i < attrib_count; ++i) 3810f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->inputs[radeon_llvm_reg_index_soa(idx + i, 0)] = interp; 3811f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3812f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 3813f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3814f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void 3815f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliehandle_shader_input_decl(struct nir_to_llvm_context *ctx, 3816f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct nir_variable *variable) 3817f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 3818f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie switch (ctx->stage) { 3819f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case MESA_SHADER_VERTEX: 3820f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie handle_vs_input_decl(ctx, variable); 3821f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3822f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case MESA_SHADER_FRAGMENT: 3823f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie handle_fs_input_decl(ctx, variable); 3824f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3825f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie default: 3826f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 3827f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3828f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3829f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 3830f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3831f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void 3832f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliehandle_fs_inputs_pre(struct nir_to_llvm_context *ctx, 3833f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct nir_shader *nir) 3834f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 3835f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned index = 0; 3836f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned i = 0; i < RADEON_LLVM_MAX_INPUTS; ++i) { 3837f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef interp_param; 3838f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef *inputs = ctx->inputs +radeon_llvm_reg_index_soa(i, 0); 3839f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3840f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (!(ctx->input_mask & (1ull << i))) 3841f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie continue; 3842f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3843f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC) { 3844f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie interp_param = *inputs; 3845f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie interp_fs_input(ctx, index, interp_param, ctx->prim_mask, 3846f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie inputs); 3847f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3848f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (!interp_param) 3849f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->shader_info->fs.flat_shaded_mask |= 1u << index; 3850f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ++index; 3851f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else if (i == VARYING_SLOT_POS) { 3852f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for(int i = 0; i < 3; ++i) 3853f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie inputs[i] = ctx->frag_pos[i]; 3854f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 38551007047ca1086eef89f65c87e7ea2371c4f80513Nicolai Hähnle inputs[3] = ac_emit_fdiv(&ctx->ac, ctx->f32one, ctx->frag_pos[3]); 3856f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3857f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3858f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->shader_info->fs.num_interp = index; 3859f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (ctx->input_mask & (1 << VARYING_SLOT_PNTC)) 3860f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->shader_info->fs.has_pcoord = true; 3861f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->shader_info->fs.input_mask = ctx->input_mask >> VARYING_SLOT_VAR0; 3862f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 3863f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3864f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef 3865f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlieac_build_alloca(struct nir_to_llvm_context *ctx, 3866f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef type, 3867f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const char *name) 3868f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 3869f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuilderRef builder = ctx->builder; 3870f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder); 3871f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef function = LLVMGetBasicBlockParent(current_block); 3872f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function); 3873f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block); 3874f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ctx->context); 3875f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef res; 3876f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3877f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (first_instr) { 3878f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMPositionBuilderBefore(first_builder, first_instr); 3879f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else { 3880f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMPositionBuilderAtEnd(first_builder, first_block); 3881f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3882f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3883f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie res = LLVMBuildAlloca(first_builder, type, name); 3884f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildStore(builder, LLVMConstNull(type), res); 3885f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3886f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMDisposeBuilder(first_builder); 3887f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3888f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return res; 3889f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 3890f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3891f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef si_build_alloca_undef(struct nir_to_llvm_context *ctx, 3892f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef type, 3893f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const char *name) 3894f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 3895f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef ptr = ac_build_alloca(ctx, type, name); 3896f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildStore(ctx->builder, LLVMGetUndef(type), ptr); 3897f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return ptr; 3898f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 3899f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3900f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void 3901f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliehandle_shader_output_decl(struct nir_to_llvm_context *ctx, 3902f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct nir_variable *variable) 3903f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 390427a8aab882980e35b66f7f318fd2fd7b500401b4Fredrik Höglund int idx = variable->data.location + variable->data.index; 3905f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); 3906f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3907f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie variable->data.driver_location = idx * 4; 3908f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3909f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (ctx->stage == MESA_SHADER_VERTEX) { 3910f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3911f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (idx == VARYING_SLOT_CLIP_DIST0 || 3912f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie idx == VARYING_SLOT_CULL_DIST0) { 3913f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int length = glsl_get_length(variable->type); 3914f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (idx == VARYING_SLOT_CLIP_DIST0) { 3915f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->shader_info->vs.clip_dist_mask = (1 << length) - 1; 3916f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->num_clips = length; 3917f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else if (idx == VARYING_SLOT_CULL_DIST0) { 3918f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->shader_info->vs.cull_dist_mask = (1 << length) - 1; 3919f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->num_culls = length; 3920f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3921f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (length > 4) 3922f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie attrib_count = 2; 3923f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie else 3924f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie attrib_count = 1; 3925f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3926f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3927f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3928f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned i = 0; i < attrib_count; ++i) { 3929f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned chan = 0; chan < 4; chan++) { 3930f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->outputs[radeon_llvm_reg_index_soa(idx + i, chan)] = 3931f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie si_build_alloca_undef(ctx, ctx->f32, ""); 3932f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3933f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 393427a8aab882980e35b66f7f318fd2fd7b500401b4Fredrik Höglund ctx->output_mask |= ((1ull << attrib_count) - 1) << idx; 3935f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 3936f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3937f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void 3938f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliesetup_locals(struct nir_to_llvm_context *ctx, 3939f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct nir_function *func) 3940f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 3941f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int i, j; 3942f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->num_locals = 0; 3943f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_foreach_variable(variable, &func->impl->locals) { 3944f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); 3945f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie variable->data.driver_location = ctx->num_locals * 4; 3946f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->num_locals += attrib_count; 3947f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3948f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef)); 3949f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (!ctx->locals) 3950f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return; 3951f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3952f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (i = 0; i < ctx->num_locals; i++) { 3953f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (j = 0; j < 4; j++) { 3954f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->locals[i * 4 + j] = 3955f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie si_build_alloca_undef(ctx, ctx->f32, "temp"); 3956f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3957f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 3958f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 3959f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3960f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef 3961f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlieemit_float_saturate(struct nir_to_llvm_context *ctx, LLVMValueRef v, float lo, float hi) 3962f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 3963f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie v = to_float(ctx, v); 3964f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie v = emit_intrin_2f_param(ctx, "llvm.maxnum.f32", v, LLVMConstReal(ctx->f32, lo)); 3965f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return emit_intrin_2f_param(ctx, "llvm.minnum.f32", v, LLVMConstReal(ctx->f32, hi)); 3966f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 3967f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3968f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3969f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic LLVMValueRef emit_pack_int16(struct nir_to_llvm_context *ctx, 3970f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef src0, LLVMValueRef src1) 3971f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 3972f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false); 3973f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef comp[2]; 3974f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3975f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie comp[0] = LLVMBuildAnd(ctx->builder, src0, LLVMConstInt(ctx-> i32, 65535, 0), ""); 3976f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie comp[1] = LLVMBuildAnd(ctx->builder, src1, LLVMConstInt(ctx-> i32, 65535, 0), ""); 3977f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, ""); 3978f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return LLVMBuildOr(ctx->builder, comp[0], comp[1], ""); 3979f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 3980f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3981f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie/* Initialize arguments for the shader export intrinsic */ 3982f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void 3983f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliesi_llvm_init_export_args(struct nir_to_llvm_context *ctx, 3984f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef *values, 3985f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned target, 3986f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef *args) 3987f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 3988f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* Default is 0xf. Adjusted below depending on the format. */ 3989f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[0] = LLVMConstInt(ctx->i32, target != V_008DFC_SQ_EXP_NULL ? 0xf : 0, false); 3990f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* Specify whether the EXEC mask represents the valid mask */ 3991f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[1] = LLVMConstInt(ctx->i32, 0, false); 3992f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3993f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* Specify whether this is the last export */ 3994f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[2] = LLVMConstInt(ctx->i32, 0, false); 3995f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* Specify the target we are exporting */ 3996f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[3] = LLVMConstInt(ctx->i32, target, false); 3997f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 3998f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[4] = LLVMConstInt(ctx->i32, 0, false); /* COMPR flag */ 3999f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[5] = LLVMGetUndef(ctx->f32); 4000f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[6] = LLVMGetUndef(ctx->f32); 4001f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[7] = LLVMGetUndef(ctx->f32); 4002f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[8] = LLVMGetUndef(ctx->f32); 4003f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4004f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (!values) 4005f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return; 4006f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4007f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (ctx->stage == MESA_SHADER_FRAGMENT && target >= V_008DFC_SQ_EXP_MRT) { 4008f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef val[4]; 4009f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned index = target - V_008DFC_SQ_EXP_MRT; 4010f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned col_format = (ctx->options->key.fs.col_format >> (4 * index)) & 0xf; 4011f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1; 4012f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4013f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie switch(col_format) { 4014f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case V_028714_SPI_SHADER_ZERO: 4015f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[0] = LLVMConstInt(ctx->i32, 0x0, 0); 4016f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_NULL, 0); 4017f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 4018f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4019f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case V_028714_SPI_SHADER_32_R: 4020f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[0] = LLVMConstInt(ctx->i32, 0x1, 0); 4021f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[5] = values[0]; 4022f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 4023f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4024f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case V_028714_SPI_SHADER_32_GR: 4025f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[0] = LLVMConstInt(ctx->i32, 0x3, 0); 4026f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[5] = values[0]; 4027f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[6] = values[1]; 4028f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 4029f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4030f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case V_028714_SPI_SHADER_32_AR: 4031f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[0] = LLVMConstInt(ctx->i32, 0x9, 0); 4032f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[5] = values[0]; 4033f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[8] = values[3]; 4034f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 4035f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4036f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case V_028714_SPI_SHADER_FP16_ABGR: 4037f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[4] = ctx->i32one; 4038f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4039f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned chan = 0; chan < 2; chan++) { 4040f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef pack_args[2] = { 4041f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie values[2 * chan], 4042f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie values[2 * chan + 1] 4043f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie }; 4044f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef packed; 4045f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 40462c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle packed = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.packf16", 4047f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->i32, pack_args, 2, 40482fdaf38c0106d87f89fa20fbe229e66d9a3ad14aDave Airlie AC_FUNC_ATTR_READNONE); 4049f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[chan + 5] = packed; 4050f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4051f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 4052f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4053f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case V_028714_SPI_SHADER_UNORM16_ABGR: 4054f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned chan = 0; chan < 4; chan++) { 4055f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie val[chan] = emit_float_saturate(ctx, values[chan], 0, 1); 4056f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie val[chan] = LLVMBuildFMul(ctx->builder, val[chan], 4057f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstReal(ctx->f32, 65535), ""); 4058f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie val[chan] = LLVMBuildFAdd(ctx->builder, val[chan], 4059f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstReal(ctx->f32, 0.5), ""); 4060f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie val[chan] = LLVMBuildFPToUI(ctx->builder, val[chan], 4061f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->i32, ""); 4062f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4063f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4064f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[4] = ctx->i32one; 4065f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[5] = emit_pack_int16(ctx, val[0], val[1]); 4066f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[6] = emit_pack_int16(ctx, val[2], val[3]); 4067f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 4068f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4069f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case V_028714_SPI_SHADER_SNORM16_ABGR: 4070f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned chan = 0; chan < 4; chan++) { 4071f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie val[chan] = emit_float_saturate(ctx, values[chan], -1, 1); 4072f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie val[chan] = LLVMBuildFMul(ctx->builder, val[chan], 4073f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstReal(ctx->f32, 32767), ""); 4074f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4075f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* If positive, add 0.5, else add -0.5. */ 4076f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie val[chan] = LLVMBuildFAdd(ctx->builder, val[chan], 4077f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildSelect(ctx->builder, 4078f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildFCmp(ctx->builder, LLVMRealOGE, 4079f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie val[chan], ctx->f32zero, ""), 4080f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstReal(ctx->f32, 0.5), 4081f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMConstReal(ctx->f32, -0.5), ""), ""); 4082f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan], ctx->i32, ""); 4083f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4084f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4085f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[4] = ctx->i32one; 4086f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[5] = emit_pack_int16(ctx, val[0], val[1]); 4087f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[6] = emit_pack_int16(ctx, val[2], val[3]); 4088f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 4089f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4090f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case V_028714_SPI_SHADER_UINT16_ABGR: { 4091f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 255 : 65535, 0); 4092f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4093f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned chan = 0; chan < 4; chan++) { 4094f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie val[chan] = to_integer(ctx, values[chan]); 4095f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie val[chan] = emit_minmax_int(ctx, LLVMIntULT, val[chan], max); 4096f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4097f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4098f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[4] = ctx->i32one; 4099f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[5] = emit_pack_int16(ctx, val[0], val[1]); 4100f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[6] = emit_pack_int16(ctx, val[2], val[3]); 4101f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 4102f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4103f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4104f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case V_028714_SPI_SHADER_SINT16_ABGR: { 4105f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 127 : 32767, 0); 4106f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef min = LLVMConstInt(ctx->i32, is_int8 ? -128 : -32768, 0); 4107f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4108f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* Clamp. */ 4109f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned chan = 0; chan < 4; chan++) { 4110f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie val[chan] = to_integer(ctx, values[chan]); 4111f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie val[chan] = emit_minmax_int(ctx, LLVMIntSLT, val[chan], max); 4112f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie val[chan] = emit_minmax_int(ctx, LLVMIntSGT, val[chan], min); 4113f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4114f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4115f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[4] = ctx->i32one; 4116f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[5] = emit_pack_int16(ctx, val[0], val[1]); 4117f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[6] = emit_pack_int16(ctx, val[2], val[3]); 4118f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 4119f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4120f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4121f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie default: 4122f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case V_028714_SPI_SHADER_32_ABGR: 4123f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie memcpy(&args[5], values, sizeof(values[0]) * 4); 4124f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 4125f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4126f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else 4127f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie memcpy(&args[5], values, sizeof(values[0]) * 4); 4128f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4129f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned i = 5; i < 9; ++i) 4130f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[i] = to_float(ctx, args[i]); 4131f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 4132f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4133f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void 4134bd56de88dfb192310f3432a3c0e0ddc3469c6d55Dave Airliehandle_vs_outputs_post(struct nir_to_llvm_context *ctx) 4135f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 4136f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie uint32_t param_count = 0; 4137f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned target; 4138f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned pos_idx, num_pos_exports = 0; 4139f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef args[9]; 4140f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef pos_args[4][9] = { { 0 } }; 41416b635bbe16c93ad13afa3390d20c2f0f033e065dDave Airlie LLVMValueRef psize_value = NULL, layer_value = NULL, viewport_index_value = NULL; 4142f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int i; 4143f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const uint64_t clip_mask = ctx->output_mask & ((1ull << VARYING_SLOT_CLIP_DIST0) | 4144f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie (1ull << VARYING_SLOT_CLIP_DIST1) | 4145f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie (1ull << VARYING_SLOT_CULL_DIST0) | 4146f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie (1ull << VARYING_SLOT_CULL_DIST1)); 4147f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4148f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (clip_mask) { 4149f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef slots[8]; 4150f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned j; 4151f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4152f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (ctx->shader_info->vs.cull_dist_mask) 4153f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->shader_info->vs.cull_dist_mask <<= ctx->num_clips; 4154f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4155f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie i = VARYING_SLOT_CLIP_DIST0; 4156f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (j = 0; j < ctx->num_clips; j++) 4157f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie slots[j] = to_float(ctx, LLVMBuildLoad(ctx->builder, 4158f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->outputs[radeon_llvm_reg_index_soa(i, j)], "")); 4159f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie i = VARYING_SLOT_CULL_DIST0; 4160f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (j = 0; j < ctx->num_culls; j++) 4161f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie slots[ctx->num_clips + j] = to_float(ctx, LLVMBuildLoad(ctx->builder, 4162f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->outputs[radeon_llvm_reg_index_soa(i, j)], "")); 4163f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4164f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (i = ctx->num_clips + ctx->num_culls; i < 8; i++) 4165f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie slots[i] = LLVMGetUndef(ctx->f32); 4166f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4167f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (ctx->num_clips + ctx->num_culls > 4) { 4168f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie target = V_008DFC_SQ_EXP_POS + 3; 4169f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie si_llvm_init_export_args(ctx, &slots[4], target, args); 4170f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie memcpy(pos_args[target - V_008DFC_SQ_EXP_POS], 4171f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args, sizeof(args)); 4172f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4173f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4174f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie target = V_008DFC_SQ_EXP_POS + 2; 4175f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie si_llvm_init_export_args(ctx, &slots[0], target, args); 4176f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie memcpy(pos_args[target - V_008DFC_SQ_EXP_POS], 4177f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args, sizeof(args)); 4178f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4179f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4180f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4181f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { 4182f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef values[4]; 4183f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (!(ctx->output_mask & (1ull << i))) 4184f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie continue; 4185f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4186f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned j = 0; j < 4; j++) 4187f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie values[j] = to_float(ctx, LLVMBuildLoad(ctx->builder, 4188f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->outputs[radeon_llvm_reg_index_soa(i, j)], "")); 4189f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4190f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (i == VARYING_SLOT_POS) { 4191f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie target = V_008DFC_SQ_EXP_POS; 4192f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else if (i == VARYING_SLOT_CLIP_DIST0 || 4193f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie i == VARYING_SLOT_CLIP_DIST1 || 4194f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie i == VARYING_SLOT_CULL_DIST0 || 4195f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie i == VARYING_SLOT_CULL_DIST1) { 4196f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie continue; 4197f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else if (i == VARYING_SLOT_PSIZ) { 4198f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->shader_info->vs.writes_pointsize = true; 4199f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie psize_value = values[0]; 4200f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie continue; 42016b635bbe16c93ad13afa3390d20c2f0f033e065dDave Airlie } else if (i == VARYING_SLOT_LAYER) { 42026b635bbe16c93ad13afa3390d20c2f0f033e065dDave Airlie ctx->shader_info->vs.writes_layer = true; 42036b635bbe16c93ad13afa3390d20c2f0f033e065dDave Airlie layer_value = values[0]; 42046b635bbe16c93ad13afa3390d20c2f0f033e065dDave Airlie continue; 42056b635bbe16c93ad13afa3390d20c2f0f033e065dDave Airlie } else if (i == VARYING_SLOT_VIEWPORT) { 42066b635bbe16c93ad13afa3390d20c2f0f033e065dDave Airlie ctx->shader_info->vs.writes_viewport_index = true; 42076b635bbe16c93ad13afa3390d20c2f0f033e065dDave Airlie viewport_index_value = values[0]; 42086b635bbe16c93ad13afa3390d20c2f0f033e065dDave Airlie continue; 4209f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else if (i >= VARYING_SLOT_VAR0) { 4210f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->shader_info->vs.export_mask |= 1u << (i - VARYING_SLOT_VAR0); 4211f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie target = V_008DFC_SQ_EXP_PARAM + param_count; 4212f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie param_count++; 4213f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4214f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4215f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie si_llvm_init_export_args(ctx, values, target, args); 4216f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4217f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (target >= V_008DFC_SQ_EXP_POS && 4218f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie target <= (V_008DFC_SQ_EXP_POS + 3)) { 4219f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie memcpy(pos_args[target - V_008DFC_SQ_EXP_POS], 4220f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args, sizeof(args)); 4221f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else { 42222c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle ac_emit_llvm_intrinsic(&ctx->ac, 4223d4392a877c0d4c930e8d2a016ae2c8b801e49c49Dave Airlie "llvm.SI.export", 4224d4392a877c0d4c930e8d2a016ae2c8b801e49c49Dave Airlie ctx->voidt, 4225d4392a877c0d4c930e8d2a016ae2c8b801e49c49Dave Airlie args, 9, 0); 4226f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4227f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4228f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4229f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* We need to add the position output manually if it's missing. */ 4230f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (!pos_args[0][0]) { 4231f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie pos_args[0][0] = LLVMConstInt(ctx->i32, 0xf, false); 4232f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie pos_args[0][1] = ctx->i32zero; /* EXEC mask */ 4233f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie pos_args[0][2] = ctx->i32zero; /* last export? */ 4234f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie pos_args[0][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS, false); 4235f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie pos_args[0][4] = ctx->i32zero; /* COMPR flag */ 4236f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie pos_args[0][5] = ctx->f32zero; /* X */ 4237f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie pos_args[0][6] = ctx->f32zero; /* Y */ 4238f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie pos_args[0][7] = ctx->f32zero; /* Z */ 4239f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie pos_args[0][8] = ctx->f32one; /* W */ 4240f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4241f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 42426b635bbe16c93ad13afa3390d20c2f0f033e065dDave Airlie uint32_t mask = ((ctx->shader_info->vs.writes_pointsize == true ? 1 : 0) | 42436b635bbe16c93ad13afa3390d20c2f0f033e065dDave Airlie (ctx->shader_info->vs.writes_layer == true ? 4 : 0) | 42446b635bbe16c93ad13afa3390d20c2f0f033e065dDave Airlie (ctx->shader_info->vs.writes_viewport_index == true ? 8 : 0)); 42456b635bbe16c93ad13afa3390d20c2f0f033e065dDave Airlie if (mask) { 42466b635bbe16c93ad13afa3390d20c2f0f033e065dDave Airlie pos_args[1][0] = LLVMConstInt(ctx->i32, mask, false); /* writemask */ 4247f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie pos_args[1][1] = ctx->i32zero; /* EXEC mask */ 4248f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie pos_args[1][2] = ctx->i32zero; /* last export? */ 4249f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie pos_args[1][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS + 1, false); 4250f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie pos_args[1][4] = ctx->i32zero; /* COMPR flag */ 4251f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie pos_args[1][5] = ctx->f32zero; /* X */ 4252f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie pos_args[1][6] = ctx->f32zero; /* Y */ 4253f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie pos_args[1][7] = ctx->f32zero; /* Z */ 4254f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie pos_args[1][8] = ctx->f32zero; /* W */ 4255f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4256f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (ctx->shader_info->vs.writes_pointsize == true) 4257f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie pos_args[1][5] = psize_value; 42586b635bbe16c93ad13afa3390d20c2f0f033e065dDave Airlie if (ctx->shader_info->vs.writes_layer == true) 42596b635bbe16c93ad13afa3390d20c2f0f033e065dDave Airlie pos_args[1][7] = layer_value; 42606b635bbe16c93ad13afa3390d20c2f0f033e065dDave Airlie if (ctx->shader_info->vs.writes_viewport_index == true) 42616b635bbe16c93ad13afa3390d20c2f0f033e065dDave Airlie pos_args[1][8] = viewport_index_value; 4262f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4263f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (i = 0; i < 4; i++) { 4264f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (pos_args[i][0]) 4265f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie num_pos_exports++; 4266f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4267f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4268f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie pos_idx = 0; 4269f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (i = 0; i < 4; i++) { 4270f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (!pos_args[i][0]) 4271f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie continue; 4272f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4273f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* Specify the target we are exporting */ 4274f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie pos_args[i][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS + pos_idx++, false); 4275f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (pos_idx == num_pos_exports) 4276f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie pos_args[i][2] = ctx->i32one; 42772c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle ac_emit_llvm_intrinsic(&ctx->ac, 4278d4392a877c0d4c930e8d2a016ae2c8b801e49c49Dave Airlie "llvm.SI.export", 4279d4392a877c0d4c930e8d2a016ae2c8b801e49c49Dave Airlie ctx->voidt, 4280d4392a877c0d4c930e8d2a016ae2c8b801e49c49Dave Airlie pos_args[i], 9, 0); 4281f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4282f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4283f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->shader_info->vs.pos_exports = num_pos_exports; 4284f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->shader_info->vs.param_exports = param_count; 4285f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 4286f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4287f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void 4288f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliesi_export_mrt_color(struct nir_to_llvm_context *ctx, 4289f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef *color, unsigned param, bool is_last) 4290f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 4291f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef args[9]; 4292f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* Export */ 4293f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie si_llvm_init_export_args(ctx, color, param, 4294f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args); 4295f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4296f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (is_last) { 4297f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[1] = ctx->i32one; /* whether the EXEC mask is valid */ 4298f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[2] = ctx->i32one; /* DONE bit */ 4299f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else if (args[0] == ctx->i32zero) 4300f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return; /* unnecessary NULL export */ 4301f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 43022c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export", 4303f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->voidt, args, 9, 0); 4304f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 4305f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4306f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void 4307f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliesi_export_mrt_z(struct nir_to_llvm_context *ctx, 4308f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef depth, LLVMValueRef stencil, 4309f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef samplemask) 4310f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 4311f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef args[9]; 4312f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned mask = 0; 4313f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[1] = ctx->i32one; /* whether the EXEC mask is valid */ 4314f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[2] = ctx->i32one; /* DONE bit */ 4315f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* Specify the target we are exporting */ 4316f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_MRTZ, false); 4317f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4318f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[4] = ctx->i32zero; /* COMP flag */ 4319f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[5] = LLVMGetUndef(ctx->f32); /* R, depth */ 4320f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[6] = LLVMGetUndef(ctx->f32); /* G, stencil test val[0:7], stencil op val[8:15] */ 4321f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[7] = LLVMGetUndef(ctx->f32); /* B, sample mask */ 4322f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[8] = LLVMGetUndef(ctx->f32); /* A, alpha to mask */ 4323f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4324f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (depth) { 4325f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[5] = depth; 4326f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie mask |= 0x1; 4327f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4328f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4329f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (stencil) { 4330f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[6] = stencil; 4331f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie mask |= 0x2; 4332f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4333f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4334f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (samplemask) { 4335f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[7] = samplemask; 4336f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie mask |= 0x04; 4337f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4338f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4339f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* SI (except OLAND) has a bug that it only looks 4340f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie * at the X writemask component. */ 4341f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (ctx->options->chip_class == SI && 4342f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->options->family != CHIP_OLAND) 4343f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie mask |= 0x01; 4344f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4345f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie args[0] = LLVMConstInt(ctx->i32, mask, false); 43462c9d26a3564c215695758b4d6b44a838b7a286d4Nicolai Hähnle ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export", 4347f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->voidt, args, 9, 0); 4348f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 4349f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4350f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void 4351bd56de88dfb192310f3432a3c0e0ddc3469c6d55Dave Airliehandle_fs_outputs_post(struct nir_to_llvm_context *ctx) 4352f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 4353f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned index = 0; 4354f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL; 4355f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4356f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { 4357f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef values[4]; 4358bafc75b4370bfbec0c91ff6bb4d4972fb37bb22aDave Airlie 4359f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (!(ctx->output_mask & (1ull << i))) 4360f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie continue; 4361f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4362f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (i == FRAG_RESULT_DEPTH) { 4363f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->shader_info->fs.writes_z = true; 4364f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie depth = to_float(ctx, LLVMBuildLoad(ctx->builder, 4365f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], "")); 4366f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else if (i == FRAG_RESULT_STENCIL) { 4367f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->shader_info->fs.writes_stencil = true; 4368f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie stencil = to_float(ctx, LLVMBuildLoad(ctx->builder, 4369f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], "")); 4370f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } else { 4371bafc75b4370bfbec0c91ff6bb4d4972fb37bb22aDave Airlie bool last = false; 4372f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (unsigned j = 0; j < 4; j++) 4373f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie values[j] = to_float(ctx, LLVMBuildLoad(ctx->builder, 4374f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->outputs[radeon_llvm_reg_index_soa(i, j)], "")); 4375f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4376bafc75b4370bfbec0c91ff6bb4d4972fb37bb22aDave Airlie if (!ctx->shader_info->fs.writes_z && !ctx->shader_info->fs.writes_stencil) 4377bafc75b4370bfbec0c91ff6bb4d4972fb37bb22aDave Airlie last = ctx->output_mask <= ((1ull << (i + 1)) - 1); 4378bafc75b4370bfbec0c91ff6bb4d4972fb37bb22aDave Airlie 4379f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + index, last); 4380f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie index++; 4381f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4382f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4383f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4384f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (depth || stencil) 4385f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie si_export_mrt_z(ctx, depth, stencil, samplemask); 4386f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie else if (!index) 4387f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true); 4388f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4389f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->shader_info->fs.output_mask = index ? ((1ull << index) - 1) : 0; 4390f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 4391f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4392f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void 4393bd56de88dfb192310f3432a3c0e0ddc3469c6d55Dave Airliehandle_shader_outputs_post(struct nir_to_llvm_context *ctx) 4394f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 4395f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie switch (ctx->stage) { 4396f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case MESA_SHADER_VERTEX: 4397bd56de88dfb192310f3432a3c0e0ddc3469c6d55Dave Airlie handle_vs_outputs_post(ctx); 4398f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 4399f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie case MESA_SHADER_FRAGMENT: 4400bd56de88dfb192310f3432a3c0e0ddc3469c6d55Dave Airlie handle_fs_outputs_post(ctx); 4401f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 4402f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie default: 4403f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie break; 4404f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4405f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 4406f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4407f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void 4408f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliehandle_shared_compute_var(struct nir_to_llvm_context *ctx, 4409f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct nir_variable *variable, uint32_t *offset, int idx) 4410f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 4411f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned size = glsl_count_attribute_slots(variable->type, false); 4412f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie variable->data.driver_location = *offset; 4413f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie *offset += size; 4414f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 4415f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4416f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx) 4417f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 4418f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMPassManagerRef passmgr; 4419f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* Create the pass manager */ 4420f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie passmgr = LLVMCreateFunctionPassManagerForModule( 4421f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx->module); 4422f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4423f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* This pass should eliminate all the load and store instructions */ 4424f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMAddPromoteMemoryToRegisterPass(passmgr); 4425f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4426f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* Add some optimization passes */ 4427f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMAddScalarReplAggregatesPass(passmgr); 4428f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMAddLICMPass(passmgr); 4429f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMAddAggressiveDCEPass(passmgr); 4430f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMAddCFGSimplificationPass(passmgr); 4431f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMAddInstructionCombiningPass(passmgr); 4432f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4433f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* Run the pass */ 4434f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMInitializeFunctionPassManager(passmgr); 4435f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMRunFunctionPassManager(passmgr, ctx->main_function); 4436f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMFinalizeFunctionPassManager(passmgr); 4437f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4438f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMDisposeBuilder(ctx->builder); 4439f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMDisposePassManager(passmgr); 4440f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 4441f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4442f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic 4443f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave AirlieLLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, 4444f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct nir_shader *nir, 4445f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct ac_shader_variant_info *shader_info, 4446f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const struct ac_nir_compiler_options *options) 4447f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 4448f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct nir_to_llvm_context ctx = {0}; 4449f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct nir_function *func; 4450ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie unsigned i; 4451f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx.options = options; 4452f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx.shader_info = shader_info; 4453f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx.context = LLVMContextCreate(); 4454f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context); 4455f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4456a0ce09b4b2a3063e49a02de3d12096cf462d10a3Nicolai Hähnle ac_llvm_context_init(&ctx.ac, ctx.context); 4457a0ce09b4b2a3063e49a02de3d12096cf462d10a3Nicolai Hähnle ctx.ac.module = ctx.module; 4458a0ce09b4b2a3063e49a02de3d12096cf462d10a3Nicolai Hähnle 44595697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie ctx.has_ds_bpermute = ctx.options->chip_class >= VI; 44605697cfb7ec08e827a48adc2cd34364696e209147Dave Airlie 4461f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie memset(shader_info, 0, sizeof(*shader_info)); 4462f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4463f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMSetTarget(ctx.module, "amdgcn--"); 4464ed5c3fad3778d338f0f886e87381bd9e1d3737cfDave Airlie 4465ed5c3fad3778d338f0f886e87381bd9e1d3737cfDave Airlie LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm); 4466ed5c3fad3778d338f0f886e87381bd9e1d3737cfDave Airlie char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout); 4467ed5c3fad3778d338f0f886e87381bd9e1d3737cfDave Airlie LLVMSetDataLayout(ctx.module, data_layout_str); 4468ed5c3fad3778d338f0f886e87381bd9e1d3737cfDave Airlie LLVMDisposeTargetData(data_layout); 4469ed5c3fad3778d338f0f886e87381bd9e1d3737cfDave Airlie LLVMDisposeMessage(data_layout_str); 4470ed5c3fad3778d338f0f886e87381bd9e1d3737cfDave Airlie 4471f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie setup_types(&ctx); 4472f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4473f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx.builder = LLVMCreateBuilderInContext(ctx.context); 4474a0ce09b4b2a3063e49a02de3d12096cf462d10a3Nicolai Hähnle ctx.ac.builder = ctx.builder; 4475f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx.stage = nir->stage; 4476f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4477ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie for (i = 0; i < AC_UD_MAX_SETS; i++) 4478ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1; 4479ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie for (i = 0; i < AC_UD_MAX_UD; i++) 4480ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1; 4481ae61ddabe8cfa1e2c8c55c478a7ea70563f64b63Dave Airlie 4482c46c376977275a3327c42ad30ec4df4cb7a4b060Dave Airlie create_function(&ctx); 4483f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4484f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (nir->stage == MESA_SHADER_COMPUTE) { 4485f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int num_shared = 0; 4486f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_foreach_variable(variable, &nir->shared) 4487f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie num_shared++; 4488f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (num_shared) { 4489f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int idx = 0; 4490f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie uint32_t shared_size = 0; 4491f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMValueRef var; 4492f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTypeRef i8p = LLVMPointerType(ctx.i8, LOCAL_ADDR_SPACE); 4493f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_foreach_variable(variable, &nir->shared) { 4494f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie handle_shared_compute_var(&ctx, variable, &shared_size, idx); 4495f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie idx++; 4496f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4497f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4498e789af4a9f10e56f908e3c55c6d764d62c7838deDave Airlie shared_size *= 16; 4499f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie var = LLVMAddGlobalInAddressSpace(ctx.module, 4500f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMArrayType(ctx.i8, shared_size), 4501f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie "compute_lds", 4502f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LOCAL_ADDR_SPACE); 4503f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMSetAlignment(var, 4); 4504f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx.shared_memory = LLVMBuildBitCast(ctx.builder, var, i8p, ""); 4505f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4506f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4507f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4508f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_foreach_variable(variable, &nir->inputs) 4509f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie handle_shader_input_decl(&ctx, variable); 4510f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4511f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (nir->stage == MESA_SHADER_FRAGMENT) 4512f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie handle_fs_inputs_pre(&ctx, nir); 4513f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4514f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie nir_foreach_variable(variable, &nir->outputs) 4515f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie handle_shader_output_decl(&ctx, variable); 4516f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4517f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, 4518f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie _mesa_key_pointer_equal); 4519f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer, 4520f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie _mesa_key_pointer_equal); 4521f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4522f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie func = (struct nir_function *)exec_list_get_head(&nir->functions); 4523f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4524f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie setup_locals(&ctx, func); 4525f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4526f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie visit_cf_list(&ctx, &func->impl->body); 4527f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie phi_post_pass(&ctx); 4528f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4529bd56de88dfb192310f3432a3c0e0ddc3469c6d55Dave Airlie handle_shader_outputs_post(&ctx); 4530f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBuildRetVoid(ctx.builder); 4531f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4532f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ac_llvm_finalize_module(&ctx); 4533f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie free(ctx.locals); 4534f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ralloc_free(ctx.defs); 4535f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ralloc_free(ctx.phis); 4536f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4537f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return ctx.module; 4538f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 4539f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4540f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic void ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context) 4541f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 4542f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned *retval = (unsigned *)context; 4543f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di); 4544f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie char *description = LLVMGetDiagInfoDescription(di); 4545f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4546f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (severity == LLVMDSError) { 4547f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie *retval = 1; 4548f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie fprintf(stderr, "LLVM triggered Diagnostic Handler: %s\n", 4549f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie description); 4550f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4551f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4552f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMDisposeMessage(description); 4553f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 4554f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4555f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airliestatic unsigned ac_llvm_compile(LLVMModuleRef M, 4556f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie struct ac_shader_binary *binary, 4557f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMTargetMachineRef tm) 4558f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 4559f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned retval = 0; 4560f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie char *err; 4561f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMContextRef llvm_ctx; 4562f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMMemoryBufferRef out_buffer; 4563f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie unsigned buffer_size; 4564f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie const char *buffer_data; 4565f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMBool mem_err; 4566f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4567f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* Setup Diagnostic Handler*/ 4568f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie llvm_ctx = LLVMGetModuleContext(M); 4569f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4570f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMContextSetDiagnosticHandler(llvm_ctx, ac_diagnostic_handler, 4571f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie &retval); 4572f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4573f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* Compile IR*/ 4574f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, 4575f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie &err, &out_buffer); 4576f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4577f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* Process Errors/Warnings */ 4578f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (mem_err) { 4579f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie fprintf(stderr, "%s: %s", __FUNCTION__, err); 4580f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie free(err); 4581f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie retval = 1; 4582f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie goto out; 4583f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4584f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4585f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* Extract Shader Code*/ 4586f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie buffer_size = LLVMGetBufferSize(out_buffer); 4587f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie buffer_data = LLVMGetBufferStart(out_buffer); 4588f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4589f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ac_elf_read(buffer_data, buffer_size, binary); 4590f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4591f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* Clean up */ 4592f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMDisposeMemoryBuffer(out_buffer); 4593f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4594f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlieout: 4595f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie return retval; 4596f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 4597f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4598788610081198260d6974f86ed62a4b9aaf59b8c4Dave Airliestatic void ac_compile_llvm_module(LLVMTargetMachineRef tm, 4599788610081198260d6974f86ed62a4b9aaf59b8c4Dave Airlie LLVMModuleRef llvm_module, 4600788610081198260d6974f86ed62a4b9aaf59b8c4Dave Airlie struct ac_shader_binary *binary, 4601788610081198260d6974f86ed62a4b9aaf59b8c4Dave Airlie struct ac_shader_config *config, 4602788610081198260d6974f86ed62a4b9aaf59b8c4Dave Airlie struct ac_shader_variant_info *shader_info, 4603788610081198260d6974f86ed62a4b9aaf59b8c4Dave Airlie gl_shader_stage stage, 4604788610081198260d6974f86ed62a4b9aaf59b8c4Dave Airlie bool dump_shader) 4605f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie{ 4606f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (dump_shader) 4607f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMDumpModule(llvm_module); 4608f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4609f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie memset(binary, 0, sizeof(*binary)); 4610f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie int v = ac_llvm_compile(llvm_module, binary, tm); 4611f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (v) { 4612f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie fprintf(stderr, "compile failed\n"); 4613f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4614f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4615f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (dump_shader) 4616f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie fprintf(stderr, "disasm:\n%s\n", binary->disasm_string); 4617f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4618f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie ac_shader_binary_read_config(binary, config, 0); 4619f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4620f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMContextRef ctx = LLVMGetModuleContext(llvm_module); 4621f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMDisposeModule(llvm_module); 4622f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie LLVMContextDispose(ctx); 4623f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4624788610081198260d6974f86ed62a4b9aaf59b8c4Dave Airlie if (stage == MESA_SHADER_FRAGMENT) { 4625f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie shader_info->num_input_vgprs = 0; 4626f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr)) 4627f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie shader_info->num_input_vgprs += 2; 4628f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (G_0286CC_PERSP_CENTER_ENA(config->spi_ps_input_addr)) 4629f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie shader_info->num_input_vgprs += 2; 4630f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (G_0286CC_PERSP_CENTROID_ENA(config->spi_ps_input_addr)) 4631f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie shader_info->num_input_vgprs += 2; 4632f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (G_0286CC_PERSP_PULL_MODEL_ENA(config->spi_ps_input_addr)) 4633f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie shader_info->num_input_vgprs += 3; 4634f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (G_0286CC_LINEAR_SAMPLE_ENA(config->spi_ps_input_addr)) 4635f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie shader_info->num_input_vgprs += 2; 4636f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (G_0286CC_LINEAR_CENTER_ENA(config->spi_ps_input_addr)) 4637f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie shader_info->num_input_vgprs += 2; 4638f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (G_0286CC_LINEAR_CENTROID_ENA(config->spi_ps_input_addr)) 4639f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie shader_info->num_input_vgprs += 2; 4640f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (G_0286CC_LINE_STIPPLE_TEX_ENA(config->spi_ps_input_addr)) 4641f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie shader_info->num_input_vgprs += 1; 4642f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (G_0286CC_POS_X_FLOAT_ENA(config->spi_ps_input_addr)) 4643f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie shader_info->num_input_vgprs += 1; 4644f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (G_0286CC_POS_Y_FLOAT_ENA(config->spi_ps_input_addr)) 4645f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie shader_info->num_input_vgprs += 1; 4646f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (G_0286CC_POS_Z_FLOAT_ENA(config->spi_ps_input_addr)) 4647f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie shader_info->num_input_vgprs += 1; 4648f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (G_0286CC_POS_W_FLOAT_ENA(config->spi_ps_input_addr)) 4649f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie shader_info->num_input_vgprs += 1; 4650f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (G_0286CC_FRONT_FACE_ENA(config->spi_ps_input_addr)) 4651f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie shader_info->num_input_vgprs += 1; 4652f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (G_0286CC_ANCILLARY_ENA(config->spi_ps_input_addr)) 4653f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie shader_info->num_input_vgprs += 1; 4654f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (G_0286CC_SAMPLE_COVERAGE_ENA(config->spi_ps_input_addr)) 4655f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie shader_info->num_input_vgprs += 1; 4656f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie if (G_0286CC_POS_FIXED_PT_ENA(config->spi_ps_input_addr)) 4657f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie shader_info->num_input_vgprs += 1; 4658f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie } 4659f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie config->num_vgprs = MAX2(config->num_vgprs, shader_info->num_input_vgprs); 4660f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie 4661f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie /* +3 for scratch wave offset and VCC */ 4662f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie config->num_sgprs = MAX2(config->num_sgprs, 4663f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie shader_info->num_input_sgprs + 3); 4664788610081198260d6974f86ed62a4b9aaf59b8c4Dave Airlie} 4665788610081198260d6974f86ed62a4b9aaf59b8c4Dave Airlie 4666788610081198260d6974f86ed62a4b9aaf59b8c4Dave Airlievoid ac_compile_nir_shader(LLVMTargetMachineRef tm, 4667788610081198260d6974f86ed62a4b9aaf59b8c4Dave Airlie struct ac_shader_binary *binary, 4668788610081198260d6974f86ed62a4b9aaf59b8c4Dave Airlie struct ac_shader_config *config, 4669788610081198260d6974f86ed62a4b9aaf59b8c4Dave Airlie struct ac_shader_variant_info *shader_info, 4670788610081198260d6974f86ed62a4b9aaf59b8c4Dave Airlie struct nir_shader *nir, 4671788610081198260d6974f86ed62a4b9aaf59b8c4Dave Airlie const struct ac_nir_compiler_options *options, 4672788610081198260d6974f86ed62a4b9aaf59b8c4Dave Airlie bool dump_shader) 4673788610081198260d6974f86ed62a4b9aaf59b8c4Dave Airlie{ 4674788610081198260d6974f86ed62a4b9aaf59b8c4Dave Airlie 4675788610081198260d6974f86ed62a4b9aaf59b8c4Dave Airlie LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, shader_info, 4676788610081198260d6974f86ed62a4b9aaf59b8c4Dave Airlie options); 46775dadd7ca27da6cd5bbac95c8e09130ec4a384e2bDave Airlie 4678788610081198260d6974f86ed62a4b9aaf59b8c4Dave Airlie ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir->stage, dump_shader); 46795dadd7ca27da6cd5bbac95c8e09130ec4a384e2bDave Airlie switch (nir->stage) { 46805dadd7ca27da6cd5bbac95c8e09130ec4a384e2bDave Airlie case MESA_SHADER_COMPUTE: 4681f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie for (int i = 0; i < 3; ++i) 4682e1af20f18a86f52a9640faf2d4ff8a71b0a4fa9bTimothy Arceri shader_info->cs.block_size[i] = nir->info->cs.local_size[i]; 46835dadd7ca27da6cd5bbac95c8e09130ec4a384e2bDave Airlie break; 46845dadd7ca27da6cd5bbac95c8e09130ec4a384e2bDave Airlie case MESA_SHADER_FRAGMENT: 4685e1af20f18a86f52a9640faf2d4ff8a71b0a4fa9bTimothy Arceri shader_info->fs.early_fragment_test = nir->info->fs.early_fragment_tests; 46865dadd7ca27da6cd5bbac95c8e09130ec4a384e2bDave Airlie break; 46875dadd7ca27da6cd5bbac95c8e09130ec4a384e2bDave Airlie default: 46885dadd7ca27da6cd5bbac95c8e09130ec4a384e2bDave Airlie break; 46895dadd7ca27da6cd5bbac95c8e09130ec4a384e2bDave Airlie } 4690f4e499ec79147f4172f3669ae9dafd941aaeeb65Dave Airlie} 4691