12faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott/* 22faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott * Copyright © 2010 Intel Corporation 32faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott * 42faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott * Permission is hereby granted, free of charge, to any person obtaining a 52faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott * copy of this software and associated documentation files (the "Software"), 62faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott * to deal in the Software without restriction, including without limitation 72faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott * the rights to use, copy, modify, merge, publish, distribute, sublicense, 82faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott * and/or sell copies of the Software, and to permit persons to whom the 92faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott * Software is furnished to do so, subject to the following conditions: 102faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott * 112faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott * The above copyright notice and this permission notice (including the next 122faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott * paragraph) shall be included in all copies or substantial portions of the 132faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott * Software. 142faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott * 152faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 162faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 172faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 182faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 192faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 202faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 212faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott * IN THE SOFTWARE. 222faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott */ 232faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 24eb63640c1d38a200a7b1540405051d3ff79d0d8aEmil Velikov#include "compiler/glsl/ir.h" 252faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott#include "brw_fs.h" 26854c4d8b37416d3e5593099a8e5441f3cf861173Francisco Jerez#include "brw_fs_surface_builder.h" 2741d64fa184671d372f6630deaf2401b00d4e984aJason Ekstrand#include "brw_nir.h" 28ecac1aab538d65f0867fd93e23d0d020c1a5d0f1Matt Turner#include "brw_program.h" 292faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 3009733f220ac9921ce7d8c3524bc5327d8203c446Francisco Jerezusing namespace brw; 31337dad8ceeb4f313a47b4ddb31805f355c3fc3a5Iago Toral Quirogausing namespace brw::surface_access; 3209733f220ac9921ce7d8c3524bc5327d8203c446Francisco Jerez 33faa38e16aadd9f2a2416fcb5087d7f8fc8178bf2Kenneth Graunkevoid 34faa38e16aadd9f2a2416fcb5087d7f8fc8178bf2Kenneth Graunkefs_visitor::emit_nir_code() 35faa38e16aadd9f2a2416fcb5087d7f8fc8178bf2Kenneth Graunke{ 362faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott /* emit the arrays used for inputs and outputs - load/store intrinsics will 372faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott * be converted to reads/writes of these arrays 382faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott */ 397926c3ea7d8f455cbee390d20c78dadf5432b9bcJason Ekstrand nir_setup_outputs(); 407926c3ea7d8f455cbee390d20c78dadf5432b9bcJason Ekstrand nir_setup_uniforms(); 417926c3ea7d8f455cbee390d20c78dadf5432b9bcJason Ekstrand nir_emit_system_values(); 42cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand 432faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott /* get the main function and emit it */ 449464d8c49813aba77285e7465b96e92a91ed327cJason Ekstrand nir_foreach_function(function, nir) { 45237f2f2d8b45d9d956102eec6f9be63193e5269bJason Ekstrand assert(strcmp(function->name, "main") == 0); 46237f2f2d8b45d9d956102eec6f9be63193e5269bJason Ekstrand assert(function->impl); 47237f2f2d8b45d9d956102eec6f9be63193e5269bJason Ekstrand nir_emit_impl(function->impl); 482faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott } 492faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott} 502faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 512faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbottvoid 527926c3ea7d8f455cbee390d20c78dadf5432b9bcJason Ekstrandfs_visitor::nir_setup_outputs() 532faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott{ 54b00a236d6a6212323f77248ba923c65eeb02592bFrancisco Jerez if (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_FRAGMENT) 557d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke return; 567d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 571e3c1b107e075b210998998423901092b8fcd79bJason Ekstrand nir_foreach_variable(var, &nir->outputs) { 58b63f7671a3eafa4ab293a13f45f58837bd840a46Kenneth Graunke const unsigned vec4s = 59b63f7671a3eafa4ab293a13f45f58837bd840a46Kenneth Graunke var->data.compact ? DIV_ROUND_UP(glsl_get_length(var->type), 4) 60b63f7671a3eafa4ab293a13f45f58837bd840a46Kenneth Graunke : type_size_vec4(var->type); 6159864e8e02057cc6fa0448a8af067a3cf53389daKenneth Graunke fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_F, 4 * vec4s); 6259864e8e02057cc6fa0448a8af067a3cf53389daKenneth Graunke for (unsigned i = 0; i < vec4s; i++) { 6359864e8e02057cc6fa0448a8af067a3cf53389daKenneth Graunke if (outputs[var->data.driver_location + i].file == BAD_FILE) 6459864e8e02057cc6fa0448a8af067a3cf53389daKenneth Graunke outputs[var->data.driver_location + i] = offset(reg, bld, 4 * i); 6559864e8e02057cc6fa0448a8af067a3cf53389daKenneth Graunke } 662faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott } 672faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott} 682faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 692faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbottvoid 707926c3ea7d8f455cbee390d20c78dadf5432b9bcJason Ekstrandfs_visitor::nir_setup_uniforms() 712faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott{ 7247e2a57fe955c04763c979ff4ca61c6867fa05bbJordan Justen if (dispatch_width != min_dispatch_width) 733e5a90792d14aeb599dd236f830e6e344b35c905Francisco Jerez return; 743e5a90792d14aeb599dd236f830e6e344b35c905Francisco Jerez 7518069dce4a4c3d71e6afc6b10bfa7bee0560ba9cJason Ekstrand uniforms = nir->num_uniforms / 4; 762faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott} 772faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 78cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrandstatic bool 797efff10585122d484dc3adab14af9380b9b8f309Connor Abbottemit_system_values_block(nir_block *block, fs_visitor *v) 80cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand{ 81cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand fs_reg *reg; 82cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand 83707e72f13bb78869ee95d3286980bf1709cba6cfJason Ekstrand nir_foreach_instr(instr, block) { 84cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand if (instr->type != nir_instr_type_intrinsic) 85cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand continue; 86cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand 87cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 88cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand switch (intrin->intrinsic) { 8934628a838aa96643be02cd23eb55af50025dd422Kenneth Graunke case nir_intrinsic_load_vertex_id: 9034628a838aa96643be02cd23eb55af50025dd422Kenneth Graunke unreachable("should be lowered by lower_vertex_id()."); 9134628a838aa96643be02cd23eb55af50025dd422Kenneth Graunke 9234628a838aa96643be02cd23eb55af50025dd422Kenneth Graunke case nir_intrinsic_load_vertex_id_zero_base: 9334628a838aa96643be02cd23eb55af50025dd422Kenneth Graunke assert(v->stage == MESA_SHADER_VERTEX); 9434628a838aa96643be02cd23eb55af50025dd422Kenneth Graunke reg = &v->nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE]; 9534628a838aa96643be02cd23eb55af50025dd422Kenneth Graunke if (reg->file == BAD_FILE) 9634628a838aa96643be02cd23eb55af50025dd422Kenneth Graunke *reg = *v->emit_vs_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE); 9734628a838aa96643be02cd23eb55af50025dd422Kenneth Graunke break; 9834628a838aa96643be02cd23eb55af50025dd422Kenneth Graunke 9934628a838aa96643be02cd23eb55af50025dd422Kenneth Graunke case nir_intrinsic_load_base_vertex: 10034628a838aa96643be02cd23eb55af50025dd422Kenneth Graunke assert(v->stage == MESA_SHADER_VERTEX); 10134628a838aa96643be02cd23eb55af50025dd422Kenneth Graunke reg = &v->nir_system_values[SYSTEM_VALUE_BASE_VERTEX]; 10234628a838aa96643be02cd23eb55af50025dd422Kenneth Graunke if (reg->file == BAD_FILE) 10334628a838aa96643be02cd23eb55af50025dd422Kenneth Graunke *reg = *v->emit_vs_system_value(SYSTEM_VALUE_BASE_VERTEX); 10434628a838aa96643be02cd23eb55af50025dd422Kenneth Graunke break; 10534628a838aa96643be02cd23eb55af50025dd422Kenneth Graunke 10634628a838aa96643be02cd23eb55af50025dd422Kenneth Graunke case nir_intrinsic_load_instance_id: 10734628a838aa96643be02cd23eb55af50025dd422Kenneth Graunke assert(v->stage == MESA_SHADER_VERTEX); 10834628a838aa96643be02cd23eb55af50025dd422Kenneth Graunke reg = &v->nir_system_values[SYSTEM_VALUE_INSTANCE_ID]; 10934628a838aa96643be02cd23eb55af50025dd422Kenneth Graunke if (reg->file == BAD_FILE) 11034628a838aa96643be02cd23eb55af50025dd422Kenneth Graunke *reg = *v->emit_vs_system_value(SYSTEM_VALUE_INSTANCE_ID); 11134628a838aa96643be02cd23eb55af50025dd422Kenneth Graunke break; 11234628a838aa96643be02cd23eb55af50025dd422Kenneth Graunke 11317ebb55a14b5a9aa639845fbda9330ef9421834aKristian Høgsberg Kristensen case nir_intrinsic_load_base_instance: 11417ebb55a14b5a9aa639845fbda9330ef9421834aKristian Høgsberg Kristensen assert(v->stage == MESA_SHADER_VERTEX); 11517ebb55a14b5a9aa639845fbda9330ef9421834aKristian Høgsberg Kristensen reg = &v->nir_system_values[SYSTEM_VALUE_BASE_INSTANCE]; 11617ebb55a14b5a9aa639845fbda9330ef9421834aKristian Høgsberg Kristensen if (reg->file == BAD_FILE) 11717ebb55a14b5a9aa639845fbda9330ef9421834aKristian Høgsberg Kristensen *reg = *v->emit_vs_system_value(SYSTEM_VALUE_BASE_INSTANCE); 11817ebb55a14b5a9aa639845fbda9330ef9421834aKristian Høgsberg Kristensen break; 11917ebb55a14b5a9aa639845fbda9330ef9421834aKristian Høgsberg Kristensen 120cddfc2cefa93b884c40329dcb193fe4fb22143abKristian Høgsberg Kristensen case nir_intrinsic_load_draw_id: 121cddfc2cefa93b884c40329dcb193fe4fb22143abKristian Høgsberg Kristensen assert(v->stage == MESA_SHADER_VERTEX); 122cddfc2cefa93b884c40329dcb193fe4fb22143abKristian Høgsberg Kristensen reg = &v->nir_system_values[SYSTEM_VALUE_DRAW_ID]; 123cddfc2cefa93b884c40329dcb193fe4fb22143abKristian Høgsberg Kristensen if (reg->file == BAD_FILE) 124cddfc2cefa93b884c40329dcb193fe4fb22143abKristian Høgsberg Kristensen *reg = *v->emit_vs_system_value(SYSTEM_VALUE_DRAW_ID); 125cddfc2cefa93b884c40329dcb193fe4fb22143abKristian Høgsberg Kristensen break; 126cddfc2cefa93b884c40329dcb193fe4fb22143abKristian Høgsberg Kristensen 12748c76eae8e52fba2fe22d2cfa7f3c94a5420feb2Kenneth Graunke case nir_intrinsic_load_invocation_id: 1287d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke if (v->stage == MESA_SHADER_TESS_CTRL) 1297d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke break; 13048c76eae8e52fba2fe22d2cfa7f3c94a5420feb2Kenneth Graunke assert(v->stage == MESA_SHADER_GEOMETRY); 13148c76eae8e52fba2fe22d2cfa7f3c94a5420feb2Kenneth Graunke reg = &v->nir_system_values[SYSTEM_VALUE_INVOCATION_ID]; 13248c76eae8e52fba2fe22d2cfa7f3c94a5420feb2Kenneth Graunke if (reg->file == BAD_FILE) { 13348c76eae8e52fba2fe22d2cfa7f3c94a5420feb2Kenneth Graunke const fs_builder abld = v->bld.annotate("gl_InvocationID", NULL); 13448c76eae8e52fba2fe22d2cfa7f3c94a5420feb2Kenneth Graunke fs_reg g1(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)); 13548c76eae8e52fba2fe22d2cfa7f3c94a5420feb2Kenneth Graunke fs_reg iid = abld.vgrf(BRW_REGISTER_TYPE_UD, 1); 1363ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner abld.SHR(iid, g1, brw_imm_ud(27u)); 13748c76eae8e52fba2fe22d2cfa7f3c94a5420feb2Kenneth Graunke *reg = iid; 13848c76eae8e52fba2fe22d2cfa7f3c94a5420feb2Kenneth Graunke } 13948c76eae8e52fba2fe22d2cfa7f3c94a5420feb2Kenneth Graunke break; 14048c76eae8e52fba2fe22d2cfa7f3c94a5420feb2Kenneth Graunke 141cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand case nir_intrinsic_load_sample_pos: 142cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand assert(v->stage == MESA_SHADER_FRAGMENT); 143cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand reg = &v->nir_system_values[SYSTEM_VALUE_SAMPLE_POS]; 144cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand if (reg->file == BAD_FILE) 145cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand *reg = *v->emit_samplepos_setup(); 146cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand break; 147cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand 148cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand case nir_intrinsic_load_sample_id: 149cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand assert(v->stage == MESA_SHADER_FRAGMENT); 150cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand reg = &v->nir_system_values[SYSTEM_VALUE_SAMPLE_ID]; 151cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand if (reg->file == BAD_FILE) 152cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand *reg = *v->emit_sampleid_setup(); 153cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand break; 154cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand 155cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand case nir_intrinsic_load_sample_mask_in: 156cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand assert(v->stage == MESA_SHADER_FRAGMENT); 15728e9601d0e681411b60a7de8be9f401b0df77d29Jason Ekstrand assert(v->devinfo->gen >= 7); 158cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand reg = &v->nir_system_values[SYSTEM_VALUE_SAMPLE_MASK_IN]; 159cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand if (reg->file == BAD_FILE) 160447d3eec6a869200612e5010f47335cb26789a3aKenneth Graunke *reg = *v->emit_samplemaskin_setup(); 161cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand break; 162cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand 163c5743a5d7fa62a339222ceb96d568a525d77fe0cJordan Justen case nir_intrinsic_load_work_group_id: 164c5743a5d7fa62a339222ceb96d568a525d77fe0cJordan Justen assert(v->stage == MESA_SHADER_COMPUTE); 165c5743a5d7fa62a339222ceb96d568a525d77fe0cJordan Justen reg = &v->nir_system_values[SYSTEM_VALUE_WORK_GROUP_ID]; 166c5743a5d7fa62a339222ceb96d568a525d77fe0cJordan Justen if (reg->file == BAD_FILE) 167c5743a5d7fa62a339222ceb96d568a525d77fe0cJordan Justen *reg = *v->emit_cs_work_group_id_setup(); 168c5743a5d7fa62a339222ceb96d568a525d77fe0cJordan Justen break; 169c5743a5d7fa62a339222ceb96d568a525d77fe0cJordan Justen 170c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner case nir_intrinsic_load_helper_invocation: 171c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner assert(v->stage == MESA_SHADER_FRAGMENT); 172c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner reg = &v->nir_system_values[SYSTEM_VALUE_HELPER_INVOCATION]; 173c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner if (reg->file == BAD_FILE) { 174c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner const fs_builder abld = 175c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner v->bld.annotate("gl_HelperInvocation", NULL); 176c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner 177c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner /* On Gen6+ (gl_HelperInvocation is only exposed on Gen7+) the 178c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner * pixel mask is in g1.7 of the thread payload. 179c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner * 180c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner * We move the per-channel pixel enable bit to the low bit of each 181c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner * channel by shifting the byte containing the pixel mask by the 182c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner * vector immediate 0x76543210UV. 183c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner * 184c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner * The region of <1,8,0> reads only 1 byte (the pixel masks for 185c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner * subspans 0 and 1) in SIMD8 and an additional byte (the pixel 186c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner * masks for 2 and 3) in SIMD16. 187c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner */ 188c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner fs_reg shifted = abld.vgrf(BRW_REGISTER_TYPE_UW, 1); 189c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner abld.SHR(shifted, 190c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner stride(byte_offset(retype(brw_vec1_grf(1, 0), 191c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner BRW_REGISTER_TYPE_UB), 28), 192c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner 1, 8, 0), 193e58fabc93a25ccc910369f3638b302d46de12271Jordan Justen brw_imm_v(0x76543210)); 194c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner 195c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner /* A set bit in the pixel mask means the channel is enabled, but 196c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner * that is the opposite of gl_HelperInvocation so we need to invert 197c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner * the mask. 198c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner * 199c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner * The negate source-modifier bit of logical instructions on Gen8+ 200c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner * performs 1's complement negation, so we can use that instead of 201c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner * a NOT instruction. 202c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner */ 203c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner fs_reg inverted = negate(shifted); 204c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner if (v->devinfo->gen < 8) { 205c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner inverted = abld.vgrf(BRW_REGISTER_TYPE_UW); 206c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner abld.NOT(inverted, shifted); 207c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner } 208c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner 209c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner /* We then resolve the 0/1 result to 0/~0 boolean values by ANDing 210c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner * with 1 and negating. 211c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner */ 212c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner fs_reg anded = abld.vgrf(BRW_REGISTER_TYPE_UD, 1); 213c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner abld.AND(anded, inverted, brw_imm_uw(1)); 214c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner 215c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner fs_reg dst = abld.vgrf(BRW_REGISTER_TYPE_D, 1); 216c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner abld.MOV(dst, negate(retype(anded, BRW_REGISTER_TYPE_D))); 217c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner *reg = dst; 218c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner } 219c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner break; 220c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner 221cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand default: 222cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand break; 223cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand } 224cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand } 225cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand 226cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand return true; 227cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand} 228cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand 229cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrandvoid 2307926c3ea7d8f455cbee390d20c78dadf5432b9bcJason Ekstrandfs_visitor::nir_emit_system_values() 231cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand{ 232cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand nir_system_values = ralloc_array(mem_ctx, fs_reg, SYSTEM_VALUE_MAX); 233dba309fc14d1ca99251c8f8115d2a26ac86f14f6Matt Turner for (unsigned i = 0; i < SYSTEM_VALUE_MAX; i++) { 234dba309fc14d1ca99251c8f8115d2a26ac86f14f6Matt Turner nir_system_values[i] = fs_reg(); 235dba309fc14d1ca99251c8f8115d2a26ac86f14f6Matt Turner } 236dba309fc14d1ca99251c8f8115d2a26ac86f14f6Matt Turner 2379464d8c49813aba77285e7465b96e92a91ed327cJason Ekstrand nir_foreach_function(function, nir) { 238237f2f2d8b45d9d956102eec6f9be63193e5269bJason Ekstrand assert(strcmp(function->name, "main") == 0); 239237f2f2d8b45d9d956102eec6f9be63193e5269bJason Ekstrand assert(function->impl); 2407efff10585122d484dc3adab14af9380b9b8f309Connor Abbott nir_foreach_block(block, function->impl) { 2417efff10585122d484dc3adab14af9380b9b8f309Connor Abbott emit_system_values_block(block, this); 2427efff10585122d484dc3adab14af9380b9b8f309Connor Abbott } 243cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand } 244cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand} 245cb53aacaa1555b98fa77146492e96a7e3d7631baJason Ekstrand 2462faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbottvoid 247e0aa4c6272851ed418dfa18ee6014f40b0e266c2Jason Ekstrandfs_visitor::nir_emit_impl(nir_function_impl *impl) 2482faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott{ 249dba309fc14d1ca99251c8f8115d2a26ac86f14f6Matt Turner nir_locals = ralloc_array(mem_ctx, fs_reg, impl->reg_alloc); 250dba309fc14d1ca99251c8f8115d2a26ac86f14f6Matt Turner for (unsigned i = 0; i < impl->reg_alloc; i++) { 251dba309fc14d1ca99251c8f8115d2a26ac86f14f6Matt Turner nir_locals[i] = fs_reg(); 252dba309fc14d1ca99251c8f8115d2a26ac86f14f6Matt Turner } 253dba309fc14d1ca99251c8f8115d2a26ac86f14f6Matt Turner 254e0aa4c6272851ed418dfa18ee6014f40b0e266c2Jason Ekstrand foreach_list_typed(nir_register, reg, node, &impl->registers) { 2552faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott unsigned array_elems = 256e0aa4c6272851ed418dfa18ee6014f40b0e266c2Jason Ekstrand reg->num_array_elems == 0 ? 1 : reg->num_array_elems; 257e0aa4c6272851ed418dfa18ee6014f40b0e266c2Jason Ekstrand unsigned size = array_elems * reg->num_components; 2580f1690fd9514f7a282141a7ad57a06b334b6c1a4Connor Abbott const brw_reg_type reg_type = 2590f1690fd9514f7a282141a7ad57a06b334b6c1a4Connor Abbott reg->bit_size == 32 ? BRW_REGISTER_TYPE_F : BRW_REGISTER_TYPE_DF; 2600f1690fd9514f7a282141a7ad57a06b334b6c1a4Connor Abbott nir_locals[reg->index] = bld.vgrf(reg_type, size); 2612faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott } 2622faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 263864907e2f14523c130e6ff24c081789bb079bae1Connor Abbott nir_ssa_values = reralloc(mem_ctx, nir_ssa_values, fs_reg, 264864907e2f14523c130e6ff24c081789bb079bae1Connor Abbott impl->ssa_alloc); 265864907e2f14523c130e6ff24c081789bb079bae1Connor Abbott 2662faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott nir_emit_cf_list(&impl->body); 2672faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott} 2682faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 2692faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbottvoid 2702faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbottfs_visitor::nir_emit_cf_list(exec_list *list) 2712faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott{ 2728776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand exec_list_validate(list); 2732faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott foreach_list_typed(nir_cf_node, node, node, list) { 2742faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott switch (node->type) { 2752faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_cf_node_if: 2762faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott nir_emit_if(nir_cf_node_as_if(node)); 2772faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 2782faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 2792faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_cf_node_loop: 2802faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott nir_emit_loop(nir_cf_node_as_loop(node)); 2812faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 2822faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 2832faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_cf_node_block: 2842faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott nir_emit_block(nir_cf_node_as_block(node)); 2852faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 2862faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 2872faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott default: 2882faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott unreachable("Invalid CFG node block"); 2892faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott } 2902faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott } 2912faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott} 2922faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 2932faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbottvoid 2942faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbottfs_visitor::nir_emit_if(nir_if *if_stmt) 2952faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott{ 2962faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott /* first, put the condition into f0 */ 2973632c28bde071950dc57e42eb62a65fb838c8bdcFrancisco Jerez fs_inst *inst = bld.MOV(bld.null_reg_d(), 2982faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott retype(get_nir_src(if_stmt->condition), 2993632c28bde071950dc57e42eb62a65fb838c8bdcFrancisco Jerez BRW_REGISTER_TYPE_D)); 3002faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott inst->conditional_mod = BRW_CONDITIONAL_NZ; 3012faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 3023632c28bde071950dc57e42eb62a65fb838c8bdcFrancisco Jerez bld.IF(BRW_PREDICATE_NORMAL); 3032faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 3042faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott nir_emit_cf_list(&if_stmt->then_list); 3052faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 3062faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott /* note: if the else is empty, dead CF elimination will remove it */ 3073632c28bde071950dc57e42eb62a65fb838c8bdcFrancisco Jerez bld.emit(BRW_OPCODE_ELSE); 3082faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 3092faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott nir_emit_cf_list(&if_stmt->else_list); 3102faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 3113632c28bde071950dc57e42eb62a65fb838c8bdcFrancisco Jerez bld.emit(BRW_OPCODE_ENDIF); 3122faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott} 3132faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 3142faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbottvoid 3152faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbottfs_visitor::nir_emit_loop(nir_loop *loop) 3162faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott{ 3173632c28bde071950dc57e42eb62a65fb838c8bdcFrancisco Jerez bld.emit(BRW_OPCODE_DO); 3182faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 3192faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott nir_emit_cf_list(&loop->body); 3202faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 3213632c28bde071950dc57e42eb62a65fb838c8bdcFrancisco Jerez bld.emit(BRW_OPCODE_WHILE); 3222faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott} 3232faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 3242faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbottvoid 3252faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbottfs_visitor::nir_emit_block(nir_block *block) 3262faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott{ 327707e72f13bb78869ee95d3286980bf1709cba6cfJason Ekstrand nir_foreach_instr(instr, block) { 3282faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott nir_emit_instr(instr); 3292faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott } 3302faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott} 3312faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 3322faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbottvoid 3332faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbottfs_visitor::nir_emit_instr(nir_instr *instr) 3342faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott{ 3353632c28bde071950dc57e42eb62a65fb838c8bdcFrancisco Jerez const fs_builder abld = bld.annotate(NULL, instr); 336b6354d9bb077815d2e388dc5d0e7411ea6d89748Kenneth Graunke 3372faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott switch (instr->type) { 3382faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_instr_type_alu: 339fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez nir_emit_alu(abld, nir_instr_as_alu(instr)); 3402faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 3412faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 3422faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_instr_type_intrinsic: 343918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke switch (stage) { 344918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case MESA_SHADER_VERTEX: 345918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke nir_emit_vs_intrinsic(abld, nir_instr_as_intrinsic(instr)); 346918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke break; 3477d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke case MESA_SHADER_TESS_CTRL: 3487d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke nir_emit_tcs_intrinsic(abld, nir_instr_as_intrinsic(instr)); 3497d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke break; 350a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke case MESA_SHADER_TESS_EVAL: 351a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke nir_emit_tes_intrinsic(abld, nir_instr_as_intrinsic(instr)); 352a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke break; 353918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case MESA_SHADER_GEOMETRY: 354918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke nir_emit_gs_intrinsic(abld, nir_instr_as_intrinsic(instr)); 355918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke break; 356918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case MESA_SHADER_FRAGMENT: 357918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke nir_emit_fs_intrinsic(abld, nir_instr_as_intrinsic(instr)); 358918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke break; 359918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case MESA_SHADER_COMPUTE: 360918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke nir_emit_cs_intrinsic(abld, nir_instr_as_intrinsic(instr)); 361918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke break; 362918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke default: 363918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke unreachable("unsupported shader stage"); 364918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke } 3652faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 3662faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 367cd4b995254fe29bae9ab5a9563cc615274d361edJason Ekstrand case nir_instr_type_tex: 368bf83a1a219af8bf82c3c721888bbe0dfc3eced34Francisco Jerez nir_emit_texture(abld, nir_instr_as_tex(instr)); 3692faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 3702faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 3712faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_instr_type_load_const: 3720ecdf04060518149e99a098caf4f6025fd6482a4Connor Abbott nir_emit_load_const(abld, nir_instr_as_load_const(instr)); 3732faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 3742faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 375864907e2f14523c130e6ff24c081789bb079bae1Connor Abbott case nir_instr_type_ssa_undef: 37612a912586f11ccbc4612532d5ceaf1bdd0cdb45aKenneth Graunke /* We create a new VGRF for undefs on every use (by handling 37712a912586f11ccbc4612532d5ceaf1bdd0cdb45aKenneth Graunke * them in get_nir_src()), rather than for each definition. 37812a912586f11ccbc4612532d5ceaf1bdd0cdb45aKenneth Graunke * This helps register coalescing eliminate MOVs from undef. 37912a912586f11ccbc4612532d5ceaf1bdd0cdb45aKenneth Graunke */ 380864907e2f14523c130e6ff24c081789bb079bae1Connor Abbott break; 381864907e2f14523c130e6ff24c081789bb079bae1Connor Abbott 3822faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_instr_type_jump: 3833632c28bde071950dc57e42eb62a65fb838c8bdcFrancisco Jerez nir_emit_jump(abld, nir_instr_as_jump(instr)); 3842faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 3852faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 3862faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott default: 3872faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott unreachable("unknown instruction type"); 3882faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott } 3892faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott} 3902faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 3911f862e923cba1d5cd54a707f70f0be113635e855Matt Turner/** 3921f862e923cba1d5cd54a707f70f0be113635e855Matt Turner * Recognizes a parent instruction of nir_op_extract_* and changes the type to 3931f862e923cba1d5cd54a707f70f0be113635e855Matt Turner * match instr. 3941f862e923cba1d5cd54a707f70f0be113635e855Matt Turner */ 3951f862e923cba1d5cd54a707f70f0be113635e855Matt Turnerbool 3961f862e923cba1d5cd54a707f70f0be113635e855Matt Turnerfs_visitor::optimize_extract_to_float(nir_alu_instr *instr, 3971f862e923cba1d5cd54a707f70f0be113635e855Matt Turner const fs_reg &result) 3981f862e923cba1d5cd54a707f70f0be113635e855Matt Turner{ 3991f862e923cba1d5cd54a707f70f0be113635e855Matt Turner if (!instr->src[0].src.is_ssa || 4001f862e923cba1d5cd54a707f70f0be113635e855Matt Turner !instr->src[0].src.ssa->parent_instr) 4011f862e923cba1d5cd54a707f70f0be113635e855Matt Turner return false; 4021f862e923cba1d5cd54a707f70f0be113635e855Matt Turner 4031f862e923cba1d5cd54a707f70f0be113635e855Matt Turner if (instr->src[0].src.ssa->parent_instr->type != nir_instr_type_alu) 4041f862e923cba1d5cd54a707f70f0be113635e855Matt Turner return false; 4051f862e923cba1d5cd54a707f70f0be113635e855Matt Turner 4061f862e923cba1d5cd54a707f70f0be113635e855Matt Turner nir_alu_instr *src0 = 4071f862e923cba1d5cd54a707f70f0be113635e855Matt Turner nir_instr_as_alu(instr->src[0].src.ssa->parent_instr); 4081f862e923cba1d5cd54a707f70f0be113635e855Matt Turner 4091f862e923cba1d5cd54a707f70f0be113635e855Matt Turner if (src0->op != nir_op_extract_u8 && src0->op != nir_op_extract_u16 && 4101f862e923cba1d5cd54a707f70f0be113635e855Matt Turner src0->op != nir_op_extract_i8 && src0->op != nir_op_extract_i16) 4111f862e923cba1d5cd54a707f70f0be113635e855Matt Turner return false; 4121f862e923cba1d5cd54a707f70f0be113635e855Matt Turner 4131f862e923cba1d5cd54a707f70f0be113635e855Matt Turner nir_const_value *element = nir_src_as_const_value(src0->src[1].src); 4141f862e923cba1d5cd54a707f70f0be113635e855Matt Turner assert(element != NULL); 4151f862e923cba1d5cd54a707f70f0be113635e855Matt Turner 41629ce110be6d0d4e4df51be635810f528f7dd7f40Francisco Jerez /* Element type to extract.*/ 41729ce110be6d0d4e4df51be635810f528f7dd7f40Francisco Jerez const brw_reg_type type = brw_int_type( 41829ce110be6d0d4e4df51be635810f528f7dd7f40Francisco Jerez src0->op == nir_op_extract_u16 || src0->op == nir_op_extract_i16 ? 2 : 1, 41929ce110be6d0d4e4df51be635810f528f7dd7f40Francisco Jerez src0->op == nir_op_extract_i16 || src0->op == nir_op_extract_i8); 4201f862e923cba1d5cd54a707f70f0be113635e855Matt Turner 4211f862e923cba1d5cd54a707f70f0be113635e855Matt Turner fs_reg op0 = get_nir_src(src0->src[0].src); 422d17cdacba37cff8ee172322c9ba2c4a58bf57d8bConnor Abbott op0.type = brw_type_for_nir_type( 423d17cdacba37cff8ee172322c9ba2c4a58bf57d8bConnor Abbott (nir_alu_type)(nir_op_infos[src0->op].input_types[0] | 424d17cdacba37cff8ee172322c9ba2c4a58bf57d8bConnor Abbott nir_src_bit_size(src0->src[0].src))); 4251f862e923cba1d5cd54a707f70f0be113635e855Matt Turner op0 = offset(op0, bld, src0->src[0].swizzle[0]); 4261f862e923cba1d5cd54a707f70f0be113635e855Matt Turner 4271f862e923cba1d5cd54a707f70f0be113635e855Matt Turner set_saturate(instr->dest.saturate, 42829ce110be6d0d4e4df51be635810f528f7dd7f40Francisco Jerez bld.MOV(result, subscript(op0, type, element->u32[0]))); 4291f862e923cba1d5cd54a707f70f0be113635e855Matt Turner return true; 4301f862e923cba1d5cd54a707f70f0be113635e855Matt Turner} 4311f862e923cba1d5cd54a707f70f0be113635e855Matt Turner 4327a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turnerbool 4337a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turnerfs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr, 4347a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner const fs_reg &result) 4357a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner{ 436864907e2f14523c130e6ff24c081789bb079bae1Connor Abbott if (!instr->src[0].src.is_ssa || 437864907e2f14523c130e6ff24c081789bb079bae1Connor Abbott instr->src[0].src.ssa->parent_instr->type != nir_instr_type_intrinsic) 4387a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner return false; 4397a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner 4407a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner nir_intrinsic_instr *src0 = 441864907e2f14523c130e6ff24c081789bb079bae1Connor Abbott nir_instr_as_intrinsic(instr->src[0].src.ssa->parent_instr); 4427a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner 4437a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner if (src0->intrinsic != nir_intrinsic_load_front_face) 4447a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner return false; 4457a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner 4467a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src); 447084b24f5582567ebf5aa94b7f40ae3bdcb71316bIago Toral Quiroga if (!value1 || fabsf(value1->f32[0]) != 1.0f) 4487a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner return false; 4497a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner 4507a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner nir_const_value *value2 = nir_src_as_const_value(instr->src[2].src); 451084b24f5582567ebf5aa94b7f40ae3bdcb71316bIago Toral Quiroga if (!value2 || fabsf(value2->f32[0]) != 1.0f) 4527a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner return false; 4537a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner 4547a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner fs_reg tmp = vgrf(glsl_type::int_type); 4557a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner 45628e9601d0e681411b60a7de8be9f401b0df77d29Jason Ekstrand if (devinfo->gen >= 6) { 4577a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner /* Bit 15 of g0.0 is 0 if the polygon is front facing. */ 4587a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner fs_reg g0 = fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_W)); 4597a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner 4607a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner /* For (gl_FrontFacing ? 1.0 : -1.0), emit: 4617a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner * 4627a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner * or(8) tmp.1<2>W g0.0<0,1,0>W 0x00003f80W 4637a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner * and(8) dst<1>D tmp<8,8,1>D 0xbf800000D 4647a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner * 4657a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner * and negate g0.0<0,1,0>W for (gl_FrontFacing ? -1.0 : 1.0). 4667a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner * 4677a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner * This negation looks like it's safe in practice, because bits 0:4 will 4687a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner * surely be TRIANGLES 4697a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner */ 4707a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner 471084b24f5582567ebf5aa94b7f40ae3bdcb71316bIago Toral Quiroga if (value1->f32[0] == -1.0f) { 4727a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner g0.negate = true; 4737a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner } 4747a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner 47562aaef6c83e4eb354bd7f15803db01e90d22fc34Francisco Jerez bld.OR(subscript(tmp, BRW_REGISTER_TYPE_W, 1), 47662aaef6c83e4eb354bd7f15803db01e90d22fc34Francisco Jerez g0, brw_imm_uw(0x3f80)); 4777a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner } else { 4787a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner /* Bit 31 of g1.6 is 0 if the polygon is front facing. */ 4797a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner fs_reg g1_6 = fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_D)); 4807a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner 4817a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner /* For (gl_FrontFacing ? 1.0 : -1.0), emit: 4827a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner * 4837a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner * or(8) tmp<1>D g1.6<0,1,0>D 0x3f800000D 4847a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner * and(8) dst<1>D tmp<8,8,1>D 0xbf800000D 4857a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner * 4867a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner * and negate g1.6<0,1,0>D for (gl_FrontFacing ? -1.0 : 1.0). 4877a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner * 4887a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner * This negation looks like it's safe in practice, because bits 0:4 will 4897a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner * surely be TRIANGLES 4907a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner */ 4917a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner 492084b24f5582567ebf5aa94b7f40ae3bdcb71316bIago Toral Quiroga if (value1->f32[0] == -1.0f) { 4937a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner g1_6.negate = true; 4947a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner } 4957a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner 4963ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner bld.OR(tmp, g1_6, brw_imm_d(0x3f800000)); 4977a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner } 4983ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner bld.AND(retype(result, BRW_REGISTER_TYPE_D), tmp, brw_imm_d(0xbf800000)); 4997a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner 5007a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner return true; 5017a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner} 5027a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner 503de20086eed47e6bfe7c25835d72383114f99c7a9Ian Romanickstatic void 504de20086eed47e6bfe7c25835d72383114f99c7a9Ian Romanickemit_find_msb_using_lzd(const fs_builder &bld, 505de20086eed47e6bfe7c25835d72383114f99c7a9Ian Romanick const fs_reg &result, 506de20086eed47e6bfe7c25835d72383114f99c7a9Ian Romanick const fs_reg &src, 507de20086eed47e6bfe7c25835d72383114f99c7a9Ian Romanick bool is_signed) 508de20086eed47e6bfe7c25835d72383114f99c7a9Ian Romanick{ 509de20086eed47e6bfe7c25835d72383114f99c7a9Ian Romanick fs_inst *inst; 510c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick fs_reg temp = src; 511de20086eed47e6bfe7c25835d72383114f99c7a9Ian Romanick 512c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick if (is_signed) { 513c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick /* LZD of an absolute value source almost always does the right 514c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick * thing. There are two problem values: 515c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick * 516c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick * * 0x80000000. Since abs(0x80000000) == 0x80000000, LZD returns 517c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick * 0. However, findMSB(int(0x80000000)) == 30. 518c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick * 519c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick * * 0xffffffff. Since abs(0xffffffff) == 1, LZD returns 520c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick * 31. Section 8.8 (Integer Functions) of the GLSL 4.50 spec says: 521c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick * 522c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick * For a value of zero or negative one, -1 will be returned. 523c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick * 524c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick * * Negative powers of two. LZD(abs(-(1<<x))) returns x, but 525c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick * findMSB(-(1<<x)) should return x-1. 526c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick * 527c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick * For all negative number cases, including 0x80000000 and 528c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick * 0xffffffff, the correct value is obtained from LZD if instead of 529c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick * negating the (already negative) value the logical-not is used. A 530c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick * conditonal logical-not can be achieved in two instructions. 531c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick */ 532c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick temp = bld.vgrf(BRW_REGISTER_TYPE_D); 533c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick 534c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick bld.ASR(temp, src, brw_imm_d(31)); 535c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick bld.XOR(temp, temp, src); 536c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick } 537c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick 538c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick bld.LZD(retype(result, BRW_REGISTER_TYPE_UD), 539c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick retype(temp, BRW_REGISTER_TYPE_UD)); 540de20086eed47e6bfe7c25835d72383114f99c7a9Ian Romanick 541de20086eed47e6bfe7c25835d72383114f99c7a9Ian Romanick /* LZD counts from the MSB side, while GLSL's findMSB() wants the count 542de20086eed47e6bfe7c25835d72383114f99c7a9Ian Romanick * from the LSB side. Subtract the result from 31 to convert the MSB 543de20086eed47e6bfe7c25835d72383114f99c7a9Ian Romanick * count into an LSB count. If no bits are set, LZD will return 32. 544de20086eed47e6bfe7c25835d72383114f99c7a9Ian Romanick * 31-32 = -1, which is exactly what findMSB() is supposed to return. 545de20086eed47e6bfe7c25835d72383114f99c7a9Ian Romanick */ 546de20086eed47e6bfe7c25835d72383114f99c7a9Ian Romanick inst = bld.ADD(result, retype(result, BRW_REGISTER_TYPE_D), brw_imm_d(31)); 547de20086eed47e6bfe7c25835d72383114f99c7a9Ian Romanick inst->src[0].negate = true; 548de20086eed47e6bfe7c25835d72383114f99c7a9Ian Romanick} 549de20086eed47e6bfe7c25835d72383114f99c7a9Ian Romanick 5502faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbottvoid 551fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerezfs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) 5522faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott{ 5532faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key; 554112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand fs_inst *inst; 5552faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 556dfb3abbaecfbe30b8858a5428c604f9d90f65505Jason Ekstrand fs_reg result = get_nir_dest(instr->dest.dest); 557d17cdacba37cff8ee172322c9ba2c4a58bf57d8bConnor Abbott result.type = brw_type_for_nir_type( 558d17cdacba37cff8ee172322c9ba2c4a58bf57d8bConnor Abbott (nir_alu_type)(nir_op_infos[instr->op].output_type | 559d17cdacba37cff8ee172322c9ba2c4a58bf57d8bConnor Abbott nir_dest_bit_size(instr->dest.dest))); 5602faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 5618776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand fs_reg op[4]; 5628776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { 5638776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand op[i] = get_nir_src(instr->src[i].src); 564d17cdacba37cff8ee172322c9ba2c4a58bf57d8bConnor Abbott op[i].type = brw_type_for_nir_type( 565d17cdacba37cff8ee172322c9ba2c4a58bf57d8bConnor Abbott (nir_alu_type)(nir_op_infos[instr->op].input_types[i] | 566d17cdacba37cff8ee172322c9ba2c4a58bf57d8bConnor Abbott nir_src_bit_size(instr->src[i].src))); 5678776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand op[i].abs = instr->src[i].abs; 5688776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand op[i].negate = instr->src[i].negate; 5698776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand } 5708776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand 5718776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand /* We get a bunch of mov's out of the from_ssa pass and they may still 5728776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand * be vectorized. We'll handle them as a special-case. We'll also 5738776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand * handle vecN here because it's basically the same thing. 5748776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand */ 5758776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand switch (instr->op) { 5768776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand case nir_op_imov: 5778776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand case nir_op_fmov: 5788776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand case nir_op_vec2: 5798776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand case nir_op_vec3: 5808776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand case nir_op_vec4: { 5818776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand fs_reg temp = result; 5828776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand bool need_extra_copy = false; 5838776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { 5848776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand if (!instr->src[i].src.is_ssa && 5858776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand instr->dest.dest.reg.reg == instr->src[i].src.reg.reg) { 5868776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand need_extra_copy = true; 587fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez temp = bld.vgrf(result.type, 4); 5888776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand break; 5898776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand } 5908776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand } 5918776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand 5928776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand for (unsigned i = 0; i < 4; i++) { 5938776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand if (!(instr->dest.write_mask & (1 << i))) 5948776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand continue; 5958776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand 5968776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand if (instr->op == nir_op_imov || instr->op == nir_op_fmov) { 597f7dcc1160331462a071c54ca1067f9e2f57b55beJason Ekstrand inst = bld.MOV(offset(temp, bld, i), 598f7dcc1160331462a071c54ca1067f9e2f57b55beJason Ekstrand offset(op[0], bld, instr->src[0].swizzle[i])); 5998776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand } else { 600f7dcc1160331462a071c54ca1067f9e2f57b55beJason Ekstrand inst = bld.MOV(offset(temp, bld, i), 601f7dcc1160331462a071c54ca1067f9e2f57b55beJason Ekstrand offset(op[i], bld, instr->src[i].swizzle[0])); 6028776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand } 6038776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand inst->saturate = instr->dest.saturate; 6048776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand } 6058776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand 6068776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand /* In this case the source and destination registers were the same, 6078776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand * so we need to insert an extra set of moves in order to deal with 6088776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand * any swizzling. 6098776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand */ 6108776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand if (need_extra_copy) { 6118776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand for (unsigned i = 0; i < 4; i++) { 6128776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand if (!(instr->dest.write_mask & (1 << i))) 6138776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand continue; 6148776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand 615f7dcc1160331462a071c54ca1067f9e2f57b55beJason Ekstrand bld.MOV(offset(result, bld, i), offset(temp, bld, i)); 6168776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand } 6178776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand } 6188776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand return; 6198776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand } 6208776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand default: 6218776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand break; 6228776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand } 6232faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 6248776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand /* At this point, we have dealt with any instruction that operates on 6258776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand * more than a single channel. Therefore, we can just adjust the source 6268776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand * and destination registers for that channel and emit the instruction. 6278776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand */ 6288776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand unsigned channel = 0; 629112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand if (nir_op_infos[instr->op].output_size == 0) { 6308776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand /* Since NIR is doing the scalarizing for us, we should only ever see 6318776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand * vectorized operations with a single channel. 632112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand */ 633112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand assert(_mesa_bitcount(instr->dest.write_mask) == 1); 6348776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand channel = ffs(instr->dest.write_mask) - 1; 6358776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand 636f7dcc1160331462a071c54ca1067f9e2f57b55beJason Ekstrand result = offset(result, bld, channel); 6378776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand } 638112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand 6398776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { 6408776b1b14b229d110f283f5da8c3c36261068edeJason Ekstrand assert(nir_op_infos[instr->op].input_sizes[i] < 2); 641f7dcc1160331462a071c54ca1067f9e2f57b55beJason Ekstrand op[i] = offset(op[i], bld, instr->src[i].swizzle[channel]); 642112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand } 643112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand 6442faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott switch (instr->op) { 6452faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_i2f: 646112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand case nir_op_u2f: 6471f862e923cba1d5cd54a707f70f0be113635e855Matt Turner if (optimize_extract_to_float(instr, result)) 6481f862e923cba1d5cd54a707f70f0be113635e855Matt Turner return; 6490177dbb6c2fe876a9761a4a97eec44accfa4c007Iago Toral Quiroga inst = bld.MOV(result, op[0]); 6500177dbb6c2fe876a9761a4a97eec44accfa4c007Iago Toral Quiroga inst->saturate = instr->dest.saturate; 6510177dbb6c2fe876a9761a4a97eec44accfa4c007Iago Toral Quiroga break; 6521f862e923cba1d5cd54a707f70f0be113635e855Matt Turner 653a644b0939dd8284bca25042bccd2439c173dd7d7Connor Abbott case nir_op_f2d: 654efaf62a40a95b240cab7b0f371c7178aa19b7f3aIago Toral Quiroga case nir_op_i2d: 655efaf62a40a95b240cab7b0f371c7178aa19b7f3aIago Toral Quiroga case nir_op_u2d: 6560177dbb6c2fe876a9761a4a97eec44accfa4c007Iago Toral Quiroga /* CHV PRM, vol07, 3D Media GPGPU Engine, Register Region Restrictions: 6570177dbb6c2fe876a9761a4a97eec44accfa4c007Iago Toral Quiroga * 6580177dbb6c2fe876a9761a4a97eec44accfa4c007Iago Toral Quiroga * "When source or destination is 64b (...), regioning in Align1 6590177dbb6c2fe876a9761a4a97eec44accfa4c007Iago Toral Quiroga * must follow these rules: 6600177dbb6c2fe876a9761a4a97eec44accfa4c007Iago Toral Quiroga * 6610177dbb6c2fe876a9761a4a97eec44accfa4c007Iago Toral Quiroga * 1. Source and destination horizontal stride must be aligned to 6620177dbb6c2fe876a9761a4a97eec44accfa4c007Iago Toral Quiroga * the same qword. 6630177dbb6c2fe876a9761a4a97eec44accfa4c007Iago Toral Quiroga * (...)" 6640177dbb6c2fe876a9761a4a97eec44accfa4c007Iago Toral Quiroga * 6650177dbb6c2fe876a9761a4a97eec44accfa4c007Iago Toral Quiroga * This means that 32-bit to 64-bit conversions need to have the 32-bit 6660177dbb6c2fe876a9761a4a97eec44accfa4c007Iago Toral Quiroga * data elements aligned to 64-bit. This restriction does not apply to 6670177dbb6c2fe876a9761a4a97eec44accfa4c007Iago Toral Quiroga * BDW and later. 6680177dbb6c2fe876a9761a4a97eec44accfa4c007Iago Toral Quiroga */ 6690177dbb6c2fe876a9761a4a97eec44accfa4c007Iago Toral Quiroga if (devinfo->is_cherryview || devinfo->is_broxton) { 6700177dbb6c2fe876a9761a4a97eec44accfa4c007Iago Toral Quiroga fs_reg tmp = bld.vgrf(result.type, 1); 6710177dbb6c2fe876a9761a4a97eec44accfa4c007Iago Toral Quiroga tmp = subscript(tmp, op[0].type, 0); 6720177dbb6c2fe876a9761a4a97eec44accfa4c007Iago Toral Quiroga inst = bld.MOV(tmp, op[0]); 6730177dbb6c2fe876a9761a4a97eec44accfa4c007Iago Toral Quiroga inst = bld.MOV(result, tmp); 6740177dbb6c2fe876a9761a4a97eec44accfa4c007Iago Toral Quiroga inst->saturate = instr->dest.saturate; 6750177dbb6c2fe876a9761a4a97eec44accfa4c007Iago Toral Quiroga break; 6760177dbb6c2fe876a9761a4a97eec44accfa4c007Iago Toral Quiroga } 6770177dbb6c2fe876a9761a4a97eec44accfa4c007Iago Toral Quiroga /* fallthrough */ 678a644b0939dd8284bca25042bccd2439c173dd7d7Connor Abbott case nir_op_d2f: 679c63a6f21494685d41d51887901298639c4d32c22Iago Toral Quiroga case nir_op_d2i: 680c63a6f21494685d41d51887901298639c4d32c22Iago Toral Quiroga case nir_op_d2u: 681fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez inst = bld.MOV(result, op[0]); 6822faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott inst->saturate = instr->dest.saturate; 6832faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 6842faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 6852faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_f2i: 6862faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_f2u: 687fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez bld.MOV(result, op[0]); 6882faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 6892faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 6902faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_fsign: { 69180f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga if (type_sz(op[0].type) < 8) { 69280f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga /* AND(val, 0x80000000) gives the sign bit. 69380f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga * 69480f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not 69580f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga * zero. 69680f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga */ 69780f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga bld.CMP(bld.null_reg_f(), op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ); 69880f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga 69980f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UD); 70080f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga op[0].type = BRW_REGISTER_TYPE_UD; 70180f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga result.type = BRW_REGISTER_TYPE_UD; 70280f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga bld.AND(result_int, op[0], brw_imm_ud(0x80000000u)); 70380f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga 70480f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga inst = bld.OR(result_int, result_int, brw_imm_ud(0x3f800000u)); 70580f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga inst->predicate = BRW_PREDICATE_NORMAL; 70680f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga if (instr->dest.saturate) { 70780f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga inst = bld.MOV(result, result); 70880f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga inst->saturate = true; 70980f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga } 71080f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga } else { 71180f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga /* For doubles we do the same but we need to consider: 71280f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga * 71380f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga * - 2-src instructions can't operate with 64-bit immediates 71480f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga * - The sign is encoded in the high 32-bit of each DF 71580f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga * - CMP with DF requires special handling in SIMD16 71680f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga * - We need to produce a DF result. 71780f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga */ 71880f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga 71980f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga /* 2-src instructions can't have 64-bit immediates, so put 0.0 in 72080f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga * a register and compare with that. 72180f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga */ 72280f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga fs_reg tmp = vgrf(glsl_type::double_type); 72387a13f598b1ecd50bc209088cf1dc60fd90df015Iago Toral Quiroga bld.MOV(tmp, setup_imm_df(bld, 0.0)); 72480f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga 72580f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga /* A direct DF CMP using the flag register (null dst) won't work in 72680f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga * SIMD16 because the CMP will be split in two by lower_simd_width, 72780f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga * resulting in two CMP instructions with the same dst (NULL), 72880f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga * leading to dead code elimination of the first one. In SIMD8, 72980f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga * however, there is no need to split the CMP and we can save some 73080f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga * work. 73180f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga */ 73280f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga fs_reg dst_tmp = vgrf(glsl_type::double_type); 73380f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga bld.CMP(dst_tmp, op[0], tmp, BRW_CONDITIONAL_NZ); 73480f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga 73580f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga /* In SIMD16 we want to avoid using a NULL dst register with DF CMP, 73680f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga * so we store the result of the comparison in a vgrf instead and 73780f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga * then we generate a UD comparison from that that won't have to 73880f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga * be split by lower_simd_width. This is what NIR does to handle 73980f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga * double comparisons in the general case. 74080f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga */ 74180f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga if (bld.dispatch_width() == 16 ) { 74280f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga fs_reg dst_tmp_ud = retype(dst_tmp, BRW_REGISTER_TYPE_UD); 74380f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga bld.MOV(dst_tmp_ud, subscript(dst_tmp, BRW_REGISTER_TYPE_UD, 0)); 74480f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga bld.CMP(bld.null_reg_ud(), 74580f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga dst_tmp_ud, brw_imm_ud(0), BRW_CONDITIONAL_NZ); 74680f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga } 74780f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga 74880f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga /* Get the high 32-bit of each double component where the sign is */ 74980f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UD); 75080f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga bld.MOV(result_int, subscript(op[0], BRW_REGISTER_TYPE_UD, 1)); 75180f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga 75280f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga /* Get the sign bit */ 75380f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga bld.AND(result_int, result_int, brw_imm_ud(0x80000000u)); 75480f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga 75580f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga /* Add 1.0 to the sign, predicated to skip the case of op[0] == 0.0 */ 75680f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga inst = bld.OR(result_int, result_int, brw_imm_ud(0x3f800000u)); 75780f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga inst->predicate = BRW_PREDICATE_NORMAL; 75880f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga 75980f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga /* Convert from 32-bit float to 64-bit double */ 76080f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga result.type = BRW_REGISTER_TYPE_DF; 76180f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga inst = bld.MOV(result, retype(result_int, BRW_REGISTER_TYPE_F)); 76280f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga 76380f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga if (instr->dest.saturate) { 76480f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga inst = bld.MOV(result, result); 76580f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga inst->saturate = true; 76680f60a4302c8bd805882baaf60db72cf785593e3Iago Toral Quiroga } 7672faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott } 7682faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 7692faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott } 7702faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 771112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand case nir_op_isign: 7722faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott /* ASR(val, 31) -> negative val generates 0xffffffff (signed -1). 773112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand * -> non-negative val generates 0x00000000. 774112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand * Predicated OR sets 1 if val is positive. 775112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand */ 776e8a8fc956358fb5e0f776b39fdbce9247bb5538aIago Toral Quiroga assert(nir_dest_bit_size(instr->dest.dest) < 64); 7773ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner bld.CMP(bld.null_reg_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_G); 7783ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner bld.ASR(result, op[0], brw_imm_d(31)); 7793ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner inst = bld.OR(result, result, brw_imm_d(1)); 7802faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott inst->predicate = BRW_PREDICATE_NORMAL; 7812faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 7822faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 7832faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_frcp: 784fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez inst = bld.emit(SHADER_OPCODE_RCP, result, op[0]); 785112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand inst->saturate = instr->dest.saturate; 7862faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 7872faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 7882faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_fexp2: 789fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez inst = bld.emit(SHADER_OPCODE_EXP2, result, op[0]); 790112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand inst->saturate = instr->dest.saturate; 7912faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 7922faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 7932faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_flog2: 794fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez inst = bld.emit(SHADER_OPCODE_LOG2, result, op[0]); 795112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand inst->saturate = instr->dest.saturate; 7962faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 7972faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 7982faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_fsin: 799bfd17c76c1267756ea16051cbe174cb23ff49f44Kenneth Graunke inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]); 800112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand inst->saturate = instr->dest.saturate; 8012faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 8022faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 8032faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_fcos: 804bfd17c76c1267756ea16051cbe174cb23ff49f44Kenneth Graunke inst = bld.emit(SHADER_OPCODE_COS, result, op[0]); 805112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand inst->saturate = instr->dest.saturate; 8062faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 8072faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 8082faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_fddx: 809112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand if (fs_key->high_quality_derivatives) { 810fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez inst = bld.emit(FS_OPCODE_DDX_FINE, result, op[0]); 811112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand } else { 812fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez inst = bld.emit(FS_OPCODE_DDX_COARSE, result, op[0]); 813112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand } 814112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand inst->saturate = instr->dest.saturate; 8152faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 81695fbd6e1eed58f1f87aaa425bb5312a92db29d21Jason Ekstrand case nir_op_fddx_fine: 817fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez inst = bld.emit(FS_OPCODE_DDX_FINE, result, op[0]); 818112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand inst->saturate = instr->dest.saturate; 81995fbd6e1eed58f1f87aaa425bb5312a92db29d21Jason Ekstrand break; 82095fbd6e1eed58f1f87aaa425bb5312a92db29d21Jason Ekstrand case nir_op_fddx_coarse: 821fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez inst = bld.emit(FS_OPCODE_DDX_COARSE, result, op[0]); 822112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand inst->saturate = instr->dest.saturate; 82395fbd6e1eed58f1f87aaa425bb5312a92db29d21Jason Ekstrand break; 8242faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_fddy: 825112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand if (fs_key->high_quality_derivatives) { 826dac10e8a1390711f1f36f224644c4a33586cebe3Kenneth Graunke inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0]); 827112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand } else { 828dac10e8a1390711f1f36f224644c4a33586cebe3Kenneth Graunke inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0]); 829112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand } 830112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand inst->saturate = instr->dest.saturate; 8312faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 83295fbd6e1eed58f1f87aaa425bb5312a92db29d21Jason Ekstrand case nir_op_fddy_fine: 833dac10e8a1390711f1f36f224644c4a33586cebe3Kenneth Graunke inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0]); 834112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand inst->saturate = instr->dest.saturate; 83595fbd6e1eed58f1f87aaa425bb5312a92db29d21Jason Ekstrand break; 83695fbd6e1eed58f1f87aaa425bb5312a92db29d21Jason Ekstrand case nir_op_fddy_coarse: 837dac10e8a1390711f1f36f224644c4a33586cebe3Kenneth Graunke inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0]); 838112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand inst->saturate = instr->dest.saturate; 83995fbd6e1eed58f1f87aaa425bb5312a92db29d21Jason Ekstrand break; 8402faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 841112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand case nir_op_iadd: 842e8a8fc956358fb5e0f776b39fdbce9247bb5538aIago Toral Quiroga assert(nir_dest_bit_size(instr->dest.dest) < 64); 843e8a8fc956358fb5e0f776b39fdbce9247bb5538aIago Toral Quiroga case nir_op_fadd: 844fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez inst = bld.ADD(result, op[0], op[1]); 8452faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott inst->saturate = instr->dest.saturate; 8462faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 8472faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 848112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand case nir_op_fmul: 849fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez inst = bld.MUL(result, op[0], op[1]); 8502faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott inst->saturate = instr->dest.saturate; 8512faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 8522faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 8531e4e17fbd9296cc5064aabdb351a894d10190cb6Matt Turner case nir_op_imul: 854e8a8fc956358fb5e0f776b39fdbce9247bb5538aIago Toral Quiroga assert(nir_dest_bit_size(instr->dest.dest) < 64); 855fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez bld.MUL(result, op[0], op[1]); 8562faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 8572faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 8582faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_imul_high: 859e77a4a9b1f66de383043df95aada40fd5a004913Francisco Jerez case nir_op_umul_high: 860e8a8fc956358fb5e0f776b39fdbce9247bb5538aIago Toral Quiroga assert(nir_dest_bit_size(instr->dest.dest) < 64); 861e77a4a9b1f66de383043df95aada40fd5a004913Francisco Jerez bld.emit(SHADER_OPCODE_MULH, result, op[0], op[1]); 8622faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 8632faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 8642faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_idiv: 8652faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_udiv: 866e8a8fc956358fb5e0f776b39fdbce9247bb5538aIago Toral Quiroga assert(nir_dest_bit_size(instr->dest.dest) < 64); 867fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez bld.emit(SHADER_OPCODE_INT_QUOTIENT, result, op[0], op[1]); 8682faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 8692faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 870b00cd6e4a0f9a84d514f428428be348900236e2eFrancisco Jerez case nir_op_uadd_carry: 871b00cd6e4a0f9a84d514f428428be348900236e2eFrancisco Jerez unreachable("Should have been lowered by carry_to_arith()."); 8722faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 873b00cd6e4a0f9a84d514f428428be348900236e2eFrancisco Jerez case nir_op_usub_borrow: 874b00cd6e4a0f9a84d514f428428be348900236e2eFrancisco Jerez unreachable("Should have been lowered by borrow_to_arith()."); 8752faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 8762faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_umod: 877765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand case nir_op_irem: 878765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand /* According to the sign table for INT DIV in the Ivy Bridge PRM, it 879765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand * appears that our hardware just does the right thing for signed 880765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand * remainder. 881765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand */ 882e8a8fc956358fb5e0f776b39fdbce9247bb5538aIago Toral Quiroga assert(nir_dest_bit_size(instr->dest.dest) < 64); 883fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez bld.emit(SHADER_OPCODE_INT_REMAINDER, result, op[0], op[1]); 8842faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 8852faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 886765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand case nir_op_imod: { 887765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand /* Get a regular C-style remainder. If a % b == 0, set the predicate. */ 888765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand bld.emit(SHADER_OPCODE_INT_REMAINDER, result, op[0], op[1]); 889765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand 890765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand /* Math instructions don't support conditional mod */ 891765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand inst = bld.MOV(bld.null_reg_d(), result); 892765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand inst->conditional_mod = BRW_CONDITIONAL_NZ; 893765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand 894765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand /* Now, we need to determine if signs of the sources are different. 895765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand * When we XOR the sources, the top bit is 0 if they are the same and 1 896765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand * if they are different. We can then use a conditional modifier to 897765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand * turn that into a predicate. This leads us to an XOR.l instruction. 898765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand * 899765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand * Technically, according to the PRM, you're not allowed to use .l on a 900765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand * XOR instruction. However, emperical experiments and Curro's reading 901765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand * of the simulator source both indicate that it's safe. 902765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand */ 903765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_D); 904765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand inst = bld.XOR(tmp, op[0], op[1]); 905765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand inst->predicate = BRW_PREDICATE_NORMAL; 906765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand inst->conditional_mod = BRW_CONDITIONAL_L; 907765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand 908765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand /* If the result of the initial remainder operation is non-zero and the 909765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand * two sources have different signs, add in a copy of op[1] to get the 910765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand * final integer modulus value. 911765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand */ 912765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand inst = bld.ADD(result, result, op[1]); 913765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand inst->predicate = BRW_PREDICATE_NORMAL; 914765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand break; 915765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand } 916765dd6534937e125b95c7998862b1a4ec76a22d8Jason Ekstrand 9172faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_flt: 918e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott case nir_op_fge: 919e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott case nir_op_feq: 920e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott case nir_op_fne: { 921e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott fs_reg dest = result; 922e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott if (nir_src_bit_size(instr->src[0].src) > 32) { 923e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott dest = bld.vgrf(BRW_REGISTER_TYPE_DF, 1); 924e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott } 925e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott brw_conditional_mod cond; 926e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott switch (instr->op) { 927e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott case nir_op_flt: 928e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott cond = BRW_CONDITIONAL_L; 929e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott break; 930e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott case nir_op_fge: 931e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott cond = BRW_CONDITIONAL_GE; 932e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott break; 933e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott case nir_op_feq: 934e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott cond = BRW_CONDITIONAL_Z; 935e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott break; 936e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott case nir_op_fne: 937e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott cond = BRW_CONDITIONAL_NZ; 938e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott break; 939e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott default: 940e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott unreachable("bad opcode"); 941e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott } 942e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott bld.CMP(dest, op[0], op[1], cond); 943e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott if (nir_src_bit_size(instr->src[0].src) > 32) { 944e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott bld.MOV(result, subscript(dest, BRW_REGISTER_TYPE_UD, 0)); 945e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott } 946e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott break; 947e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott } 948e83f51d54e9c3db11526b66a741352135eae6f52Connor Abbott 9492faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_ilt: 9502faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_ult: 951e8a8fc956358fb5e0f776b39fdbce9247bb5538aIago Toral Quiroga assert(nir_dest_bit_size(instr->dest.dest) < 64); 952fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez bld.CMP(result, op[0], op[1], BRW_CONDITIONAL_L); 9532faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 9542faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 9552faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_ige: 9562faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_uge: 957e8a8fc956358fb5e0f776b39fdbce9247bb5538aIago Toral Quiroga assert(nir_dest_bit_size(instr->dest.dest) < 64); 958fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez bld.CMP(result, op[0], op[1], BRW_CONDITIONAL_GE); 9592faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 9602faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 9612faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_ieq: 962e8a8fc956358fb5e0f776b39fdbce9247bb5538aIago Toral Quiroga assert(nir_dest_bit_size(instr->dest.dest) < 64); 963fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez bld.CMP(result, op[0], op[1], BRW_CONDITIONAL_Z); 9642faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 9652faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 9662faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_ine: 967e8a8fc956358fb5e0f776b39fdbce9247bb5538aIago Toral Quiroga assert(nir_dest_bit_size(instr->dest.dest) < 64); 968fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez bld.CMP(result, op[0], op[1], BRW_CONDITIONAL_NZ); 9692faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 9702faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 9712faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_inot: 972e8a8fc956358fb5e0f776b39fdbce9247bb5538aIago Toral Quiroga assert(nir_dest_bit_size(instr->dest.dest) < 64); 97328e9601d0e681411b60a7de8be9f401b0df77d29Jason Ekstrand if (devinfo->gen >= 8) { 9748a688bee83ced46eb4bff741f05d2da033c07adeJason Ekstrand op[0] = resolve_source_modifiers(op[0]); 975a84f66a9b6cf46bb19ca71faca5b1d6d81209cafKenneth Graunke } 976fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez bld.NOT(result, op[0]); 9772faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 9782faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_ixor: 979e8a8fc956358fb5e0f776b39fdbce9247bb5538aIago Toral Quiroga assert(nir_dest_bit_size(instr->dest.dest) < 64); 98028e9601d0e681411b60a7de8be9f401b0df77d29Jason Ekstrand if (devinfo->gen >= 8) { 9818a688bee83ced46eb4bff741f05d2da033c07adeJason Ekstrand op[0] = resolve_source_modifiers(op[0]); 9828a688bee83ced46eb4bff741f05d2da033c07adeJason Ekstrand op[1] = resolve_source_modifiers(op[1]); 983a84f66a9b6cf46bb19ca71faca5b1d6d81209cafKenneth Graunke } 984fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez bld.XOR(result, op[0], op[1]); 9852faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 9862faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_ior: 987e8a8fc956358fb5e0f776b39fdbce9247bb5538aIago Toral Quiroga assert(nir_dest_bit_size(instr->dest.dest) < 64); 98828e9601d0e681411b60a7de8be9f401b0df77d29Jason Ekstrand if (devinfo->gen >= 8) { 9898a688bee83ced46eb4bff741f05d2da033c07adeJason Ekstrand op[0] = resolve_source_modifiers(op[0]); 9908a688bee83ced46eb4bff741f05d2da033c07adeJason Ekstrand op[1] = resolve_source_modifiers(op[1]); 991a84f66a9b6cf46bb19ca71faca5b1d6d81209cafKenneth Graunke } 992fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez bld.OR(result, op[0], op[1]); 9932faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 9942faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_iand: 995e8a8fc956358fb5e0f776b39fdbce9247bb5538aIago Toral Quiroga assert(nir_dest_bit_size(instr->dest.dest) < 64); 99628e9601d0e681411b60a7de8be9f401b0df77d29Jason Ekstrand if (devinfo->gen >= 8) { 9978a688bee83ced46eb4bff741f05d2da033c07adeJason Ekstrand op[0] = resolve_source_modifiers(op[0]); 9988a688bee83ced46eb4bff741f05d2da033c07adeJason Ekstrand op[1] = resolve_source_modifiers(op[1]); 999a84f66a9b6cf46bb19ca71faca5b1d6d81209cafKenneth Graunke } 1000fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez bld.AND(result, op[0], op[1]); 10012faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 10022faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 10032faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_fdot2: 10042faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_fdot3: 1005112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand case nir_op_fdot4: 1006112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand case nir_op_ball_fequal2: 1007112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand case nir_op_ball_iequal2: 1008112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand case nir_op_ball_fequal3: 1009112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand case nir_op_ball_iequal3: 1010112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand case nir_op_ball_fequal4: 1011112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand case nir_op_ball_iequal4: 1012112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand case nir_op_bany_fnequal2: 1013112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand case nir_op_bany_inequal2: 1014112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand case nir_op_bany_fnequal3: 1015112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand case nir_op_bany_inequal3: 1016112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand case nir_op_bany_fnequal4: 1017112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand case nir_op_bany_inequal4: 1018112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand unreachable("Lowered by nir_lower_alu_reductions"); 10192faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 10202faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_fnoise1_1: 10212faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_fnoise1_2: 10222faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_fnoise1_3: 10232faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_fnoise1_4: 10242faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_fnoise2_1: 10252faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_fnoise2_2: 10262faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_fnoise2_3: 10272faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_fnoise2_4: 10282faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_fnoise3_1: 10292faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_fnoise3_2: 10302faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_fnoise3_3: 10312faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_fnoise3_4: 10322faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_fnoise4_1: 10332faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_fnoise4_2: 10342faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_fnoise4_3: 10352faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_fnoise4_4: 10362faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott unreachable("not reached: should be handled by lower_noise"); 10372faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 10382faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_ldexp: 10392faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott unreachable("not reached: should be handled by ldexp_to_arith()"); 10402faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 10412faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_fsqrt: 1042fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez inst = bld.emit(SHADER_OPCODE_SQRT, result, op[0]); 1043112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand inst->saturate = instr->dest.saturate; 10442faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 10452faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 10462faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_frsq: 1047fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez inst = bld.emit(SHADER_OPCODE_RSQ, result, op[0]); 1048112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand inst->saturate = instr->dest.saturate; 10492faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 10502faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 10512faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_b2i: 1052112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand case nir_op_b2f: 10533ee2daf23dc91b8dfc017b5c89c10ab1376ba4dfFrancisco Jerez bld.MOV(result, negate(op[0])); 10542faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 10552faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 10562faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_f2b: 10573ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner bld.CMP(result, op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ); 10582faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 1059e0c45182e3d865d7f187dc35e70832f1fa7c9fadIago Toral Quiroga case nir_op_d2b: { 1060e0c45182e3d865d7f187dc35e70832f1fa7c9fadIago Toral Quiroga /* two-argument instructions can't take 64-bit immediates */ 1061e0c45182e3d865d7f187dc35e70832f1fa7c9fadIago Toral Quiroga fs_reg zero = vgrf(glsl_type::double_type); 106287a13f598b1ecd50bc209088cf1dc60fd90df015Iago Toral Quiroga bld.MOV(zero, setup_imm_df(bld, 0.0)); 1063e0c45182e3d865d7f187dc35e70832f1fa7c9fadIago Toral Quiroga /* A SIMD16 execution needs to be split in two instructions, so use 1064e0c45182e3d865d7f187dc35e70832f1fa7c9fadIago Toral Quiroga * a vgrf instead of the flag register as dst so instruction splitting 1065e0c45182e3d865d7f187dc35e70832f1fa7c9fadIago Toral Quiroga * works 1066e0c45182e3d865d7f187dc35e70832f1fa7c9fadIago Toral Quiroga */ 1067e0c45182e3d865d7f187dc35e70832f1fa7c9fadIago Toral Quiroga fs_reg tmp = vgrf(glsl_type::double_type); 1068e0c45182e3d865d7f187dc35e70832f1fa7c9fadIago Toral Quiroga bld.CMP(tmp, op[0], zero, BRW_CONDITIONAL_NZ); 1069e0c45182e3d865d7f187dc35e70832f1fa7c9fadIago Toral Quiroga bld.MOV(result, subscript(tmp, BRW_REGISTER_TYPE_UD, 0)); 1070e0c45182e3d865d7f187dc35e70832f1fa7c9fadIago Toral Quiroga break; 1071e0c45182e3d865d7f187dc35e70832f1fa7c9fadIago Toral Quiroga } 10722faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_i2b: 10733ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner bld.CMP(result, op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ); 10742faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 10752faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 1076112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand case nir_op_ftrunc: 1077fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez inst = bld.RNDZ(result, op[0]); 10782faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott inst->saturate = instr->dest.saturate; 10792faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 1080112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand 10812faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_fceil: { 10822faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott op[0].negate = !op[0].negate; 1083112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand fs_reg temp = vgrf(glsl_type::float_type); 1084fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez bld.RNDD(temp, op[0]); 10852faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott temp.negate = true; 1086fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez inst = bld.MOV(result, temp); 10872faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott inst->saturate = instr->dest.saturate; 10882faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 10892faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott } 1090112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand case nir_op_ffloor: 1091fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez inst = bld.RNDD(result, op[0]); 10922faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott inst->saturate = instr->dest.saturate; 10932faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 1094112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand case nir_op_ffract: 1095fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez inst = bld.FRC(result, op[0]); 10962faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott inst->saturate = instr->dest.saturate; 10972faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 1098112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand case nir_op_fround_even: 1099fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez inst = bld.RNDE(result, op[0]); 11002faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott inst->saturate = instr->dest.saturate; 11012faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 11022faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 110314c46954c910efb1db94a068a866c7259deaa9d9Jason Ekstrand case nir_op_fquantize2f16: { 110414c46954c910efb1db94a068a866c7259deaa9d9Jason Ekstrand fs_reg tmp16 = bld.vgrf(BRW_REGISTER_TYPE_D); 110514c46954c910efb1db94a068a866c7259deaa9d9Jason Ekstrand fs_reg tmp32 = bld.vgrf(BRW_REGISTER_TYPE_F); 110614c46954c910efb1db94a068a866c7259deaa9d9Jason Ekstrand fs_reg zero = bld.vgrf(BRW_REGISTER_TYPE_F); 110714c46954c910efb1db94a068a866c7259deaa9d9Jason Ekstrand 110814c46954c910efb1db94a068a866c7259deaa9d9Jason Ekstrand /* The destination stride must be at least as big as the source stride. */ 110914c46954c910efb1db94a068a866c7259deaa9d9Jason Ekstrand tmp16.type = BRW_REGISTER_TYPE_W; 111014c46954c910efb1db94a068a866c7259deaa9d9Jason Ekstrand tmp16.stride = 2; 111114c46954c910efb1db94a068a866c7259deaa9d9Jason Ekstrand 111214c46954c910efb1db94a068a866c7259deaa9d9Jason Ekstrand /* Check for denormal */ 111314c46954c910efb1db94a068a866c7259deaa9d9Jason Ekstrand fs_reg abs_src0 = op[0]; 111414c46954c910efb1db94a068a866c7259deaa9d9Jason Ekstrand abs_src0.abs = true; 111514c46954c910efb1db94a068a866c7259deaa9d9Jason Ekstrand bld.CMP(bld.null_reg_f(), abs_src0, brw_imm_f(ldexpf(1.0, -14)), 111614c46954c910efb1db94a068a866c7259deaa9d9Jason Ekstrand BRW_CONDITIONAL_L); 111714c46954c910efb1db94a068a866c7259deaa9d9Jason Ekstrand /* Get the appropriately signed zero */ 111814c46954c910efb1db94a068a866c7259deaa9d9Jason Ekstrand bld.AND(retype(zero, BRW_REGISTER_TYPE_UD), 111914c46954c910efb1db94a068a866c7259deaa9d9Jason Ekstrand retype(op[0], BRW_REGISTER_TYPE_UD), 112014c46954c910efb1db94a068a866c7259deaa9d9Jason Ekstrand brw_imm_ud(0x80000000)); 112114c46954c910efb1db94a068a866c7259deaa9d9Jason Ekstrand /* Do the actual F32 -> F16 -> F32 conversion */ 112214c46954c910efb1db94a068a866c7259deaa9d9Jason Ekstrand bld.emit(BRW_OPCODE_F32TO16, tmp16, op[0]); 112314c46954c910efb1db94a068a866c7259deaa9d9Jason Ekstrand bld.emit(BRW_OPCODE_F16TO32, tmp32, tmp16); 112414c46954c910efb1db94a068a866c7259deaa9d9Jason Ekstrand /* Select that or zero based on normal status */ 112514c46954c910efb1db94a068a866c7259deaa9d9Jason Ekstrand inst = bld.SEL(result, zero, tmp32); 112614c46954c910efb1db94a068a866c7259deaa9d9Jason Ekstrand inst->predicate = BRW_PREDICATE_NORMAL; 112714c46954c910efb1db94a068a866c7259deaa9d9Jason Ekstrand inst->saturate = instr->dest.saturate; 112814c46954c910efb1db94a068a866c7259deaa9d9Jason Ekstrand break; 112914c46954c910efb1db94a068a866c7259deaa9d9Jason Ekstrand } 113014c46954c910efb1db94a068a866c7259deaa9d9Jason Ekstrand 11312faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_imin: 11322faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_umin: 1133e8a8fc956358fb5e0f776b39fdbce9247bb5538aIago Toral Quiroga assert(nir_dest_bit_size(instr->dest.dest) < 64); 1134e8a8fc956358fb5e0f776b39fdbce9247bb5538aIago Toral Quiroga case nir_op_fmin: 11350e9dc59a58e632979b3bdebb19d184bd22a0c182Matt Turner inst = bld.emit_minmax(result, op[0], op[1], BRW_CONDITIONAL_L); 1136112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand inst->saturate = instr->dest.saturate; 11372faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 11382faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 11392faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_imax: 11402faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_umax: 1141e8a8fc956358fb5e0f776b39fdbce9247bb5538aIago Toral Quiroga assert(nir_dest_bit_size(instr->dest.dest) < 64); 1142e8a8fc956358fb5e0f776b39fdbce9247bb5538aIago Toral Quiroga case nir_op_fmax: 11430e9dc59a58e632979b3bdebb19d184bd22a0c182Matt Turner inst = bld.emit_minmax(result, op[0], op[1], BRW_CONDITIONAL_GE); 1144112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand inst->saturate = instr->dest.saturate; 11452faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 11462faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 11472faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_pack_snorm_2x16: 11482faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_pack_snorm_4x8: 11492faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_pack_unorm_2x16: 11502faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_pack_unorm_4x8: 11512faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_unpack_snorm_2x16: 11522faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_unpack_snorm_4x8: 11532faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_unpack_unorm_2x16: 11542faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_unpack_unorm_4x8: 11552faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_unpack_half_2x16: 11562faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_pack_half_2x16: 11572faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott unreachable("not reached: should be handled by lower_packing_builtins"); 11582faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 11592faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_unpack_half_2x16_split_x: 1160fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez inst = bld.emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, result, op[0]); 1161112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand inst->saturate = instr->dest.saturate; 11622faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 11632faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_unpack_half_2x16_split_y: 1164fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez inst = bld.emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, result, op[0]); 1165112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand inst->saturate = instr->dest.saturate; 11662faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 11672faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 11687782f39e759798975ace6f3272dd3f263ddc8702Connor Abbott case nir_op_pack_double_2x32_split: 11697782f39e759798975ace6f3272dd3f263ddc8702Connor Abbott bld.emit(FS_OPCODE_PACK, result, op[0], op[1]); 11707782f39e759798975ace6f3272dd3f263ddc8702Connor Abbott break; 11717782f39e759798975ace6f3272dd3f263ddc8702Connor Abbott 11727782f39e759798975ace6f3272dd3f263ddc8702Connor Abbott case nir_op_unpack_double_2x32_split_x: 1173935e0e305dd7a4f67557e969513a30357d308efbIago Toral Quiroga case nir_op_unpack_double_2x32_split_y: { 1174935e0e305dd7a4f67557e969513a30357d308efbIago Toral Quiroga /* Optimize the common case where we are unpacking from a double we have 1175935e0e305dd7a4f67557e969513a30357d308efbIago Toral Quiroga * previously packed. In this case we can just bypass the pack operation 1176935e0e305dd7a4f67557e969513a30357d308efbIago Toral Quiroga * and source directly from its arguments. 1177935e0e305dd7a4f67557e969513a30357d308efbIago Toral Quiroga */ 1178935e0e305dd7a4f67557e969513a30357d308efbIago Toral Quiroga unsigned index = (instr->op == nir_op_unpack_double_2x32_split_x) ? 0 : 1; 1179935e0e305dd7a4f67557e969513a30357d308efbIago Toral Quiroga if (instr->src[0].src.is_ssa) { 1180935e0e305dd7a4f67557e969513a30357d308efbIago Toral Quiroga nir_instr *parent_instr = instr->src[0].src.ssa->parent_instr; 1181935e0e305dd7a4f67557e969513a30357d308efbIago Toral Quiroga if (parent_instr->type == nir_instr_type_alu) { 1182935e0e305dd7a4f67557e969513a30357d308efbIago Toral Quiroga nir_alu_instr *alu_parent = nir_instr_as_alu(parent_instr); 1183935e0e305dd7a4f67557e969513a30357d308efbIago Toral Quiroga if (alu_parent->op == nir_op_pack_double_2x32_split && 1184935e0e305dd7a4f67557e969513a30357d308efbIago Toral Quiroga alu_parent->src[index].src.is_ssa) { 1185935e0e305dd7a4f67557e969513a30357d308efbIago Toral Quiroga op[0] = retype(get_nir_src(alu_parent->src[index].src), 1186935e0e305dd7a4f67557e969513a30357d308efbIago Toral Quiroga BRW_REGISTER_TYPE_UD); 1187935e0e305dd7a4f67557e969513a30357d308efbIago Toral Quiroga op[0] = 1188935e0e305dd7a4f67557e969513a30357d308efbIago Toral Quiroga offset(op[0], bld, alu_parent->src[index].swizzle[channel]); 1189935e0e305dd7a4f67557e969513a30357d308efbIago Toral Quiroga bld.MOV(result, op[0]); 1190935e0e305dd7a4f67557e969513a30357d308efbIago Toral Quiroga break; 1191935e0e305dd7a4f67557e969513a30357d308efbIago Toral Quiroga } 1192935e0e305dd7a4f67557e969513a30357d308efbIago Toral Quiroga } 1193935e0e305dd7a4f67557e969513a30357d308efbIago Toral Quiroga } 11947782f39e759798975ace6f3272dd3f263ddc8702Connor Abbott 1195935e0e305dd7a4f67557e969513a30357d308efbIago Toral Quiroga if (instr->op == nir_op_unpack_double_2x32_split_x) 1196935e0e305dd7a4f67557e969513a30357d308efbIago Toral Quiroga bld.MOV(result, subscript(op[0], BRW_REGISTER_TYPE_UD, 0)); 1197935e0e305dd7a4f67557e969513a30357d308efbIago Toral Quiroga else 1198935e0e305dd7a4f67557e969513a30357d308efbIago Toral Quiroga bld.MOV(result, subscript(op[0], BRW_REGISTER_TYPE_UD, 1)); 11997782f39e759798975ace6f3272dd3f263ddc8702Connor Abbott break; 1200935e0e305dd7a4f67557e969513a30357d308efbIago Toral Quiroga } 12017782f39e759798975ace6f3272dd3f263ddc8702Connor Abbott 12022faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_fpow: 1203fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez inst = bld.emit(SHADER_OPCODE_POW, result, op[0], op[1]); 1204112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand inst->saturate = instr->dest.saturate; 12052faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 12062faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 12072faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_bitfield_reverse: 1208e8a8fc956358fb5e0f776b39fdbce9247bb5538aIago Toral Quiroga assert(nir_dest_bit_size(instr->dest.dest) < 64); 1209fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez bld.BFREV(result, op[0]); 12102faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 12112faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 12122faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_bit_count: 1213e8a8fc956358fb5e0f776b39fdbce9247bb5538aIago Toral Quiroga assert(nir_dest_bit_size(instr->dest.dest) < 64); 1214fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez bld.CBIT(result, op[0]); 12152faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 12162faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 1217de20086eed47e6bfe7c25835d72383114f99c7a9Ian Romanick case nir_op_ufind_msb: { 1218de20086eed47e6bfe7c25835d72383114f99c7a9Ian Romanick assert(nir_dest_bit_size(instr->dest.dest) < 64); 1219de20086eed47e6bfe7c25835d72383114f99c7a9Ian Romanick emit_find_msb_using_lzd(bld, result, op[0], false); 1220de20086eed47e6bfe7c25835d72383114f99c7a9Ian Romanick break; 1221de20086eed47e6bfe7c25835d72383114f99c7a9Ian Romanick } 1222de20086eed47e6bfe7c25835d72383114f99c7a9Ian Romanick 122310adf8fc858c21cd95b3e02a8d6abee563ca1046Jason Ekstrand case nir_op_ifind_msb: { 1224e8a8fc956358fb5e0f776b39fdbce9247bb5538aIago Toral Quiroga assert(nir_dest_bit_size(instr->dest.dest) < 64); 12252faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 1226c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick if (devinfo->gen < 7) { 1227c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick emit_find_msb_using_lzd(bld, result, op[0], true); 1228c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick } else { 1229c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]); 12300b19f651958c3888588190c8c8a9e701173a2aa2Matt Turner 1231c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick /* FBH counts from the MSB side, while GLSL's findMSB() wants the 1232c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick * count from the LSB side. If FBH didn't return an error 1233c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick * (0xFFFFFFFF), then subtract the result from 31 to convert the MSB 1234c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick * count into an LSB count. 1235c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick */ 1236c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick bld.CMP(bld.null_reg_d(), result, brw_imm_d(-1), BRW_CONDITIONAL_NZ); 1237c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick 1238c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick inst = bld.ADD(result, result, brw_imm_d(31)); 1239c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick inst->predicate = BRW_PREDICATE_NORMAL; 1240c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick inst->src[0].negate = true; 1241c2019c6c261d5c46a4e5d3edc88836bcedf75f30Ian Romanick } 12422faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 12432faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott } 12442faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 12452faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_find_lsb: 1246e8a8fc956358fb5e0f776b39fdbce9247bb5538aIago Toral Quiroga assert(nir_dest_bit_size(instr->dest.dest) < 64); 12473e7cebc8da5c9f16fa1b9a25ea72b8d31c86a440Ian Romanick 12483e7cebc8da5c9f16fa1b9a25ea72b8d31c86a440Ian Romanick if (devinfo->gen < 7) { 12493e7cebc8da5c9f16fa1b9a25ea72b8d31c86a440Ian Romanick fs_reg temp = vgrf(glsl_type::int_type); 12503e7cebc8da5c9f16fa1b9a25ea72b8d31c86a440Ian Romanick 12513e7cebc8da5c9f16fa1b9a25ea72b8d31c86a440Ian Romanick /* (x & -x) generates a value that consists of only the LSB of x. 12523e7cebc8da5c9f16fa1b9a25ea72b8d31c86a440Ian Romanick * For all powers of 2, findMSB(y) == findLSB(y). 12533e7cebc8da5c9f16fa1b9a25ea72b8d31c86a440Ian Romanick */ 12543e7cebc8da5c9f16fa1b9a25ea72b8d31c86a440Ian Romanick fs_reg src = retype(op[0], BRW_REGISTER_TYPE_D); 12553e7cebc8da5c9f16fa1b9a25ea72b8d31c86a440Ian Romanick fs_reg negated_src = src; 12563e7cebc8da5c9f16fa1b9a25ea72b8d31c86a440Ian Romanick 12573e7cebc8da5c9f16fa1b9a25ea72b8d31c86a440Ian Romanick /* One must be negated, and the other must be non-negated. It 12583e7cebc8da5c9f16fa1b9a25ea72b8d31c86a440Ian Romanick * doesn't matter which is which. 12593e7cebc8da5c9f16fa1b9a25ea72b8d31c86a440Ian Romanick */ 12603e7cebc8da5c9f16fa1b9a25ea72b8d31c86a440Ian Romanick negated_src.negate = true; 12613e7cebc8da5c9f16fa1b9a25ea72b8d31c86a440Ian Romanick src.negate = false; 12623e7cebc8da5c9f16fa1b9a25ea72b8d31c86a440Ian Romanick 12633e7cebc8da5c9f16fa1b9a25ea72b8d31c86a440Ian Romanick bld.AND(temp, src, negated_src); 12643e7cebc8da5c9f16fa1b9a25ea72b8d31c86a440Ian Romanick emit_find_msb_using_lzd(bld, result, temp, false); 12653e7cebc8da5c9f16fa1b9a25ea72b8d31c86a440Ian Romanick } else { 12663e7cebc8da5c9f16fa1b9a25ea72b8d31c86a440Ian Romanick bld.FBL(result, op[0]); 12673e7cebc8da5c9f16fa1b9a25ea72b8d31c86a440Ian Romanick } 12682faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 12692faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 12702faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_ubitfield_extract: 12712faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_ibitfield_extract: 1272b82e26a6a4d6baf121f44c61c862bfa79ba0d172Matt Turner unreachable("should have been lowered"); 1273b82e26a6a4d6baf121f44c61c862bfa79ba0d172Matt Turner case nir_op_ubfe: 1274b82e26a6a4d6baf121f44c61c862bfa79ba0d172Matt Turner case nir_op_ibfe: 1275e8a8fc956358fb5e0f776b39fdbce9247bb5538aIago Toral Quiroga assert(nir_dest_bit_size(instr->dest.dest) < 64); 1276fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez bld.BFE(result, op[2], op[1], op[0]); 12772faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 12782faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_bfm: 1279e8a8fc956358fb5e0f776b39fdbce9247bb5538aIago Toral Quiroga assert(nir_dest_bit_size(instr->dest.dest) < 64); 1280fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez bld.BFI1(result, op[0], op[1]); 12812faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 12822faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_bfi: 1283e8a8fc956358fb5e0f776b39fdbce9247bb5538aIago Toral Quiroga assert(nir_dest_bit_size(instr->dest.dest) < 64); 1284fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez bld.BFI2(result, op[0], op[1], op[2]); 12852faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 12862faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 12872faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_bitfield_insert: 1288b85a229e1f542426b1c8000569d89cd4768b9339Kenneth Graunke unreachable("not reached: should have been lowered"); 12892faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 12902faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_ishl: 1291e8a8fc956358fb5e0f776b39fdbce9247bb5538aIago Toral Quiroga assert(nir_dest_bit_size(instr->dest.dest) < 64); 1292fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez bld.SHL(result, op[0], op[1]); 12932faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 12942faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_ishr: 1295e8a8fc956358fb5e0f776b39fdbce9247bb5538aIago Toral Quiroga assert(nir_dest_bit_size(instr->dest.dest) < 64); 1296fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez bld.ASR(result, op[0], op[1]); 12972faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 12982faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_ushr: 1299e8a8fc956358fb5e0f776b39fdbce9247bb5538aIago Toral Quiroga assert(nir_dest_bit_size(instr->dest.dest) < 64); 1300fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez bld.SHR(result, op[0], op[1]); 13012faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 13022faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 13032faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_pack_half_2x16_split: 1304fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez bld.emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, result, op[0], op[1]); 13052faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 13062faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 13072faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_ffma: 1308fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez inst = bld.MAD(result, op[2], op[1], op[0]); 1309ccbe15f3325d7a6d04d0ea18227a08f53decec16Kenneth Graunke inst->saturate = instr->dest.saturate; 13102faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 13112faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 13122faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_flrp: 1313fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez inst = bld.LRP(result, op[0], op[1], op[2]); 1314ccbe15f3325d7a6d04d0ea18227a08f53decec16Kenneth Graunke inst->saturate = instr->dest.saturate; 13152faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 13162faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 13172faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_op_bcsel: 13187a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner if (optimize_frontfacing_ternary(instr, result)) 13197a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner return; 13207a997a386375a98b70ae5e1d880c8d47f236de8dMatt Turner 13213ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner bld.CMP(bld.null_reg_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ); 1322fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez inst = bld.SEL(result, op[1], op[2]); 1323112d738b91aac44c2509aafe68bdbf9ab74bb3c1Jason Ekstrand inst->predicate = BRW_PREDICATE_NORMAL; 13242faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 13252faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 13261dc312e295c66ab8674d2f47f859e310f607b2edMatt Turner case nir_op_extract_u8: 13271dc312e295c66ab8674d2f47f859e310f607b2edMatt Turner case nir_op_extract_i8: { 132829ce110be6d0d4e4df51be635810f528f7dd7f40Francisco Jerez const brw_reg_type type = brw_int_type(1, instr->op == nir_op_extract_i8); 13291dc312e295c66ab8674d2f47f859e310f607b2edMatt Turner nir_const_value *byte = nir_src_as_const_value(instr->src[1].src); 1330cbb0e3a7e8fffa4d5c5af8660d99cd3da8af97ecMatt Turner assert(byte != NULL); 133129ce110be6d0d4e4df51be635810f528f7dd7f40Francisco Jerez bld.MOV(result, subscript(op[0], type, byte->u32[0])); 13321dc312e295c66ab8674d2f47f859e310f607b2edMatt Turner break; 13331dc312e295c66ab8674d2f47f859e310f607b2edMatt Turner } 13341dc312e295c66ab8674d2f47f859e310f607b2edMatt Turner 13351dc312e295c66ab8674d2f47f859e310f607b2edMatt Turner case nir_op_extract_u16: 13361dc312e295c66ab8674d2f47f859e310f607b2edMatt Turner case nir_op_extract_i16: { 133729ce110be6d0d4e4df51be635810f528f7dd7f40Francisco Jerez const brw_reg_type type = brw_int_type(2, instr->op == nir_op_extract_i16); 13381dc312e295c66ab8674d2f47f859e310f607b2edMatt Turner nir_const_value *word = nir_src_as_const_value(instr->src[1].src); 1339cbb0e3a7e8fffa4d5c5af8660d99cd3da8af97ecMatt Turner assert(word != NULL); 134029ce110be6d0d4e4df51be635810f528f7dd7f40Francisco Jerez bld.MOV(result, subscript(op[0], type, word->u32[0])); 13411dc312e295c66ab8674d2f47f859e310f607b2edMatt Turner break; 13421dc312e295c66ab8674d2f47f859e310f607b2edMatt Turner } 13431dc312e295c66ab8674d2f47f859e310f607b2edMatt Turner 13442faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott default: 13452faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott unreachable("unhandled instruction"); 13462faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott } 134741d64fa184671d372f6630deaf2401b00d4e984aJason Ekstrand 134841d64fa184671d372f6630deaf2401b00d4e984aJason Ekstrand /* If we need to do a boolean resolve, replace the result with -(x & 1) 134941d64fa184671d372f6630deaf2401b00d4e984aJason Ekstrand * to sign extend the low bit to 0/~0 135041d64fa184671d372f6630deaf2401b00d4e984aJason Ekstrand */ 135128e9601d0e681411b60a7de8be9f401b0df77d29Jason Ekstrand if (devinfo->gen <= 5 && 135241d64fa184671d372f6630deaf2401b00d4e984aJason Ekstrand (instr->instr.pass_flags & BRW_NIR_BOOLEAN_MASK) == BRW_NIR_BOOLEAN_NEEDS_RESOLVE) { 135341d64fa184671d372f6630deaf2401b00d4e984aJason Ekstrand fs_reg masked = vgrf(glsl_type::int_type); 13543ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner bld.AND(masked, result, brw_imm_d(1)); 135541d64fa184671d372f6630deaf2401b00d4e984aJason Ekstrand masked.negate = true; 1356fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6eFrancisco Jerez bld.MOV(retype(result, BRW_REGISTER_TYPE_D), masked); 135741d64fa184671d372f6630deaf2401b00d4e984aJason Ekstrand } 13582faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott} 13592faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 1360864907e2f14523c130e6ff24c081789bb079bae1Connor Abbottvoid 13610ecdf04060518149e99a098caf4f6025fd6482a4Connor Abbottfs_visitor::nir_emit_load_const(const fs_builder &bld, 13620ecdf04060518149e99a098caf4f6025fd6482a4Connor Abbott nir_load_const_instr *instr) 13630ecdf04060518149e99a098caf4f6025fd6482a4Connor Abbott{ 13640f1690fd9514f7a282141a7ad57a06b334b6c1a4Connor Abbott const brw_reg_type reg_type = 13650f1690fd9514f7a282141a7ad57a06b334b6c1a4Connor Abbott instr->def.bit_size == 32 ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_DF; 13660f1690fd9514f7a282141a7ad57a06b334b6c1a4Connor Abbott fs_reg reg = bld.vgrf(reg_type, instr->def.num_components); 13670ecdf04060518149e99a098caf4f6025fd6482a4Connor Abbott 13680f1690fd9514f7a282141a7ad57a06b334b6c1a4Connor Abbott switch (instr->def.bit_size) { 13690f1690fd9514f7a282141a7ad57a06b334b6c1a4Connor Abbott case 32: 13700f1690fd9514f7a282141a7ad57a06b334b6c1a4Connor Abbott for (unsigned i = 0; i < instr->def.num_components; i++) 13710f1690fd9514f7a282141a7ad57a06b334b6c1a4Connor Abbott bld.MOV(offset(reg, bld, i), brw_imm_d(instr->value.i32[i])); 13720f1690fd9514f7a282141a7ad57a06b334b6c1a4Connor Abbott break; 13730f1690fd9514f7a282141a7ad57a06b334b6c1a4Connor Abbott 13740f1690fd9514f7a282141a7ad57a06b334b6c1a4Connor Abbott case 64: 13750f1690fd9514f7a282141a7ad57a06b334b6c1a4Connor Abbott for (unsigned i = 0; i < instr->def.num_components; i++) 137687a13f598b1ecd50bc209088cf1dc60fd90df015Iago Toral Quiroga bld.MOV(offset(reg, bld, i), 137787a13f598b1ecd50bc209088cf1dc60fd90df015Iago Toral Quiroga setup_imm_df(bld, instr->value.f64[i])); 13780f1690fd9514f7a282141a7ad57a06b334b6c1a4Connor Abbott break; 13790f1690fd9514f7a282141a7ad57a06b334b6c1a4Connor Abbott 13800f1690fd9514f7a282141a7ad57a06b334b6c1a4Connor Abbott default: 13810f1690fd9514f7a282141a7ad57a06b334b6c1a4Connor Abbott unreachable("Invalid bit size"); 13820f1690fd9514f7a282141a7ad57a06b334b6c1a4Connor Abbott } 13830ecdf04060518149e99a098caf4f6025fd6482a4Connor Abbott 13840ecdf04060518149e99a098caf4f6025fd6482a4Connor Abbott nir_ssa_values[instr->def.index] = reg; 13850ecdf04060518149e99a098caf4f6025fd6482a4Connor Abbott} 13860ecdf04060518149e99a098caf4f6025fd6482a4Connor Abbott 13872faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbottfs_reg 1388fb5dcb81cc121e4355b7eef014474a5c42a2f6dbMatt Turnerfs_visitor::get_nir_src(const nir_src &src) 13892faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott{ 1390864907e2f14523c130e6ff24c081789bb079bae1Connor Abbott fs_reg reg; 1391c2abfc0b86628bb1b756e4ef125c97cb4386aea2Jason Ekstrand if (src.is_ssa) { 139212a912586f11ccbc4612532d5ceaf1bdd0cdb45aKenneth Graunke if (src.ssa->parent_instr->type == nir_instr_type_ssa_undef) { 139312a912586f11ccbc4612532d5ceaf1bdd0cdb45aKenneth Graunke const brw_reg_type reg_type = src.ssa->bit_size == 32 ? 139412a912586f11ccbc4612532d5ceaf1bdd0cdb45aKenneth Graunke BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_DF; 139512a912586f11ccbc4612532d5ceaf1bdd0cdb45aKenneth Graunke reg = bld.vgrf(reg_type, src.ssa->num_components); 139612a912586f11ccbc4612532d5ceaf1bdd0cdb45aKenneth Graunke } else { 139712a912586f11ccbc4612532d5ceaf1bdd0cdb45aKenneth Graunke reg = nir_ssa_values[src.ssa->index]; 139812a912586f11ccbc4612532d5ceaf1bdd0cdb45aKenneth Graunke } 1399c2abfc0b86628bb1b756e4ef125c97cb4386aea2Jason Ekstrand } else { 140022c273de2b97743587310f7bbf66767191bde866Jason Ekstrand /* We don't handle indirects on locals */ 140122c273de2b97743587310f7bbf66767191bde866Jason Ekstrand assert(src.reg.indirect == NULL); 140222c273de2b97743587310f7bbf66767191bde866Jason Ekstrand reg = offset(nir_locals[src.reg.reg->index], bld, 140322c273de2b97743587310f7bbf66767191bde866Jason Ekstrand src.reg.base_offset * src.reg.reg->num_components); 1404c2abfc0b86628bb1b756e4ef125c97cb4386aea2Jason Ekstrand } 1405864907e2f14523c130e6ff24c081789bb079bae1Connor Abbott 1406864907e2f14523c130e6ff24c081789bb079bae1Connor Abbott /* to avoid floating-point denorm flushing problems, set the type by 1407864907e2f14523c130e6ff24c081789bb079bae1Connor Abbott * default to D - instructions that need floating point semantics will set 1408864907e2f14523c130e6ff24c081789bb079bae1Connor Abbott * this to F if they need to 1409864907e2f14523c130e6ff24c081789bb079bae1Connor Abbott */ 1410864907e2f14523c130e6ff24c081789bb079bae1Connor Abbott return retype(reg, BRW_REGISTER_TYPE_D); 14112faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott} 14122faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 141307353599e07529e98494057f556b9d96c1df5cfdMatt Turner/** 141407353599e07529e98494057f556b9d96c1df5cfdMatt Turner * Return an IMM for constants; otherwise call get_nir_src() as normal. 141507353599e07529e98494057f556b9d96c1df5cfdMatt Turner */ 141607353599e07529e98494057f556b9d96c1df5cfdMatt Turnerfs_reg 1417fb5dcb81cc121e4355b7eef014474a5c42a2f6dbMatt Turnerfs_visitor::get_nir_src_imm(const nir_src &src) 141807353599e07529e98494057f556b9d96c1df5cfdMatt Turner{ 141907353599e07529e98494057f556b9d96c1df5cfdMatt Turner nir_const_value *val = nir_src_as_const_value(src); 142007353599e07529e98494057f556b9d96c1df5cfdMatt Turner return val ? fs_reg(brw_imm_d(val->i32[0])) : get_nir_src(src); 142107353599e07529e98494057f556b9d96c1df5cfdMatt Turner} 142207353599e07529e98494057f556b9d96c1df5cfdMatt Turner 14232faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbottfs_reg 1424fb5dcb81cc121e4355b7eef014474a5c42a2f6dbMatt Turnerfs_visitor::get_nir_dest(const nir_dest &dest) 14252faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott{ 1426864907e2f14523c130e6ff24c081789bb079bae1Connor Abbott if (dest.is_ssa) { 14270f1690fd9514f7a282141a7ad57a06b334b6c1a4Connor Abbott const brw_reg_type reg_type = 14280f1690fd9514f7a282141a7ad57a06b334b6c1a4Connor Abbott dest.ssa.bit_size == 32 ? BRW_REGISTER_TYPE_F : BRW_REGISTER_TYPE_DF; 14290f1690fd9514f7a282141a7ad57a06b334b6c1a4Connor Abbott nir_ssa_values[dest.ssa.index] = 14300f1690fd9514f7a282141a7ad57a06b334b6c1a4Connor Abbott bld.vgrf(reg_type, dest.ssa.num_components); 1431864907e2f14523c130e6ff24c081789bb079bae1Connor Abbott return nir_ssa_values[dest.ssa.index]; 143222c273de2b97743587310f7bbf66767191bde866Jason Ekstrand } else { 143322c273de2b97743587310f7bbf66767191bde866Jason Ekstrand /* We don't handle indirects on locals */ 143422c273de2b97743587310f7bbf66767191bde866Jason Ekstrand assert(dest.reg.indirect == NULL); 143522c273de2b97743587310f7bbf66767191bde866Jason Ekstrand return offset(nir_locals[dest.reg.reg->index], bld, 143622c273de2b97743587310f7bbf66767191bde866Jason Ekstrand dest.reg.base_offset * dest.reg.reg->num_components); 1437864907e2f14523c130e6ff24c081789bb079bae1Connor Abbott } 14382faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott} 14392faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 1440912ef52c29fdc373889594b963cc93c89fa9e3f7Francisco Jerezfs_reg 1441912ef52c29fdc373889594b963cc93c89fa9e3f7Francisco Jerezfs_visitor::get_nir_image_deref(const nir_deref_var *deref) 1442912ef52c29fdc373889594b963cc93c89fa9e3f7Francisco Jerez{ 144318069dce4a4c3d71e6afc6b10bfa7bee0560ba9cJason Ekstrand fs_reg image(UNIFORM, deref->var->data.driver_location / 4, 1444912ef52c29fdc373889594b963cc93c89fa9e3f7Francisco Jerez BRW_REGISTER_TYPE_UD); 14453c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand fs_reg indirect; 14463c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand unsigned indirect_max = 0; 1447912ef52c29fdc373889594b963cc93c89fa9e3f7Francisco Jerez 1448fb77da89f51fd82d5cee95400acb20ad74d9e7bcTimothy Arceri for (const nir_deref *tail = &deref->deref; tail->child; 1449fb77da89f51fd82d5cee95400acb20ad74d9e7bcTimothy Arceri tail = tail->child) { 1450fb77da89f51fd82d5cee95400acb20ad74d9e7bcTimothy Arceri const nir_deref_array *deref_array = nir_deref_as_array(tail->child); 1451fb77da89f51fd82d5cee95400acb20ad74d9e7bcTimothy Arceri assert(tail->child->deref_type == nir_deref_type_array); 1452fb77da89f51fd82d5cee95400acb20ad74d9e7bcTimothy Arceri const unsigned size = glsl_get_length(tail->type); 1453fb77da89f51fd82d5cee95400acb20ad74d9e7bcTimothy Arceri const unsigned element_size = type_size_scalar(deref_array->deref.type); 145413a04abc277089275217dce119e18acf4d4ce52dFrancisco Jerez const unsigned base = MIN2(deref_array->base_offset, size - 1); 1455fb77da89f51fd82d5cee95400acb20ad74d9e7bcTimothy Arceri image = offset(image, bld, base * element_size); 1456912ef52c29fdc373889594b963cc93c89fa9e3f7Francisco Jerez 1457912ef52c29fdc373889594b963cc93c89fa9e3f7Francisco Jerez if (deref_array->deref_array_type == nir_deref_array_type_indirect) { 14583c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand fs_reg tmp = vgrf(glsl_type::uint_type); 145913a04abc277089275217dce119e18acf4d4ce52dFrancisco Jerez 1460f30f6e26252ed09eca1922f7c8633c7c7b6e50feJason Ekstrand /* Accessing an invalid surface index with the dataport can result 1461f30f6e26252ed09eca1922f7c8633c7c7b6e50feJason Ekstrand * in a hang. According to the spec "if the index used to 1462f30f6e26252ed09eca1922f7c8633c7c7b6e50feJason Ekstrand * select an individual element is negative or greater than or 1463f30f6e26252ed09eca1922f7c8633c7c7b6e50feJason Ekstrand * equal to the size of the array, the results of the operation 1464f30f6e26252ed09eca1922f7c8633c7c7b6e50feJason Ekstrand * are undefined but may not lead to termination" -- which is one 1465f30f6e26252ed09eca1922f7c8633c7c7b6e50feJason Ekstrand * of the possible outcomes of the hang. Clamp the index to 1466f30f6e26252ed09eca1922f7c8633c7c7b6e50feJason Ekstrand * prevent access outside of the array bounds. 1467f30f6e26252ed09eca1922f7c8633c7c7b6e50feJason Ekstrand */ 1468f30f6e26252ed09eca1922f7c8633c7c7b6e50feJason Ekstrand bld.emit_minmax(tmp, retype(get_nir_src(deref_array->indirect), 1469f30f6e26252ed09eca1922f7c8633c7c7b6e50feJason Ekstrand BRW_REGISTER_TYPE_UD), 1470f30f6e26252ed09eca1922f7c8633c7c7b6e50feJason Ekstrand brw_imm_ud(size - base - 1), BRW_CONDITIONAL_L); 147113a04abc277089275217dce119e18acf4d4ce52dFrancisco Jerez 14723c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand indirect_max += element_size * (tail->type->length - 1); 14733c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand 147418069dce4a4c3d71e6afc6b10bfa7bee0560ba9cJason Ekstrand bld.MUL(tmp, tmp, brw_imm_ud(element_size * 4)); 14753c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand if (indirect.file == BAD_FILE) { 14763c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand indirect = tmp; 14773c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand } else { 14783c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand bld.ADD(indirect, indirect, tmp); 14793c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand } 1480912ef52c29fdc373889594b963cc93c89fa9e3f7Francisco Jerez } 1481912ef52c29fdc373889594b963cc93c89fa9e3f7Francisco Jerez } 1482912ef52c29fdc373889594b963cc93c89fa9e3f7Francisco Jerez 14833c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand if (indirect.file == BAD_FILE) { 14843c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand return image; 14853c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand } else { 14863c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand /* Emit a pile of MOVs to load the uniform into a temporary. The 14873c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand * dead-code elimination pass will get rid of what we don't use. 14883c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand */ 14893c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, BRW_IMAGE_PARAM_SIZE); 14903c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand for (unsigned j = 0; j < BRW_IMAGE_PARAM_SIZE; j++) { 14913c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand bld.emit(SHADER_OPCODE_MOV_INDIRECT, 14923c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand offset(tmp, bld, j), offset(image, bld, j), 14933c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand indirect, brw_imm_ud((indirect_max + 1) * 4)); 14943c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand } 14953c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand return tmp; 14963c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand } 1497912ef52c29fdc373889594b963cc93c89fa9e3f7Francisco Jerez} 1498912ef52c29fdc373889594b963cc93c89fa9e3f7Francisco Jerez 14992faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbottvoid 150009733f220ac9921ce7d8c3524bc5327d8203c446Francisco Jerezfs_visitor::emit_percomp(const fs_builder &bld, const fs_inst &inst, 150109733f220ac9921ce7d8c3524bc5327d8203c446Francisco Jerez unsigned wr_mask) 15022faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott{ 15032faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott for (unsigned i = 0; i < 4; i++) { 15042faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott if (!((wr_mask >> i) & 1)) 15052faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott continue; 15062faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 150709733f220ac9921ce7d8c3524bc5327d8203c446Francisco Jerez fs_inst *new_inst = new(mem_ctx) fs_inst(inst); 1508f7dcc1160331462a071c54ca1067f9e2f57b55beJason Ekstrand new_inst->dst = offset(new_inst->dst, bld, i); 15092faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott for (unsigned j = 0; j < new_inst->sources; j++) 1510b163aa01487ab5f9b22c48b7badc5d65999c4985Matt Turner if (new_inst->src[j].file == VGRF) 1511f7dcc1160331462a071c54ca1067f9e2f57b55beJason Ekstrand new_inst->src[j] = offset(new_inst->src[j], bld, i); 15122faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 151309733f220ac9921ce7d8c3524bc5327d8203c446Francisco Jerez bld.emit(new_inst); 15142faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott } 15152faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott} 15162faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 1517a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez/** 1518a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez * Get the matching channel register datatype for an image intrinsic of the 1519a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez * specified GLSL image type. 1520a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez */ 1521a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerezstatic brw_reg_type 1522a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerezget_image_base_type(const glsl_type *type) 1523a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez{ 1524ac089126b9b647f930ee2657aa16ea8e8f6a5dd7Jason Ekstrand switch ((glsl_base_type)type->sampled_type) { 1525a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez case GLSL_TYPE_UINT: 1526a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez return BRW_REGISTER_TYPE_UD; 1527a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez case GLSL_TYPE_INT: 1528a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez return BRW_REGISTER_TYPE_D; 1529a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez case GLSL_TYPE_FLOAT: 1530a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez return BRW_REGISTER_TYPE_F; 1531a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez default: 1532a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez unreachable("Not reached."); 1533a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez } 1534a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez} 1535a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez 1536a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez/** 1537a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez * Get the appropriate atomic op for an image atomic intrinsic. 1538a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez */ 1539a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerezstatic unsigned 1540a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerezget_image_atomic_op(nir_intrinsic_op op, const glsl_type *type) 1541a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez{ 1542a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez switch (op) { 1543a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez case nir_intrinsic_image_atomic_add: 1544a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez return BRW_AOP_ADD; 1545a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez case nir_intrinsic_image_atomic_min: 1546a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez return (get_image_base_type(type) == BRW_REGISTER_TYPE_D ? 1547a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez BRW_AOP_IMIN : BRW_AOP_UMIN); 1548a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez case nir_intrinsic_image_atomic_max: 1549a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez return (get_image_base_type(type) == BRW_REGISTER_TYPE_D ? 1550a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez BRW_AOP_IMAX : BRW_AOP_UMAX); 1551a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez case nir_intrinsic_image_atomic_and: 1552a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez return BRW_AOP_AND; 1553a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez case nir_intrinsic_image_atomic_or: 1554a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez return BRW_AOP_OR; 1555a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez case nir_intrinsic_image_atomic_xor: 1556a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez return BRW_AOP_XOR; 1557a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez case nir_intrinsic_image_atomic_exchange: 1558a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez return BRW_AOP_MOV; 1559a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez case nir_intrinsic_image_atomic_comp_swap: 1560a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez return BRW_AOP_CMPWR; 1561a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez default: 1562a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez unreachable("Not reachable."); 1563a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez } 1564a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez} 1565a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez 1566da361acd1c899d533caec6cae5a336f6ab35e076Neil Robertsstatic fs_inst * 1567da361acd1c899d533caec6cae5a336f6ab35e076Neil Robertsemit_pixel_interpolater_send(const fs_builder &bld, 1568da361acd1c899d533caec6cae5a336f6ab35e076Neil Roberts enum opcode opcode, 1569da361acd1c899d533caec6cae5a336f6ab35e076Neil Roberts const fs_reg &dst, 1570da361acd1c899d533caec6cae5a336f6ab35e076Neil Roberts const fs_reg &src, 1571da361acd1c899d533caec6cae5a336f6ab35e076Neil Roberts const fs_reg &desc, 1572ac1181ffbef5250cb3b651e047cce5116727c34cKenneth Graunke glsl_interp_mode interpolation) 1573da361acd1c899d533caec6cae5a336f6ab35e076Neil Roberts{ 157418f67c8a69fcde5d3f585effeef670d0861b0730Kenneth Graunke struct brw_wm_prog_data *wm_prog_data = 1575e51e055fcdf8107aafaba358fa65b00f963e1728Kenneth Graunke brw_wm_prog_data(bld.shader->stage_prog_data); 1576da361acd1c899d533caec6cae5a336f6ab35e076Neil Roberts fs_inst *inst; 1577da361acd1c899d533caec6cae5a336f6ab35e076Neil Roberts fs_reg payload; 1578da361acd1c899d533caec6cae5a336f6ab35e076Neil Roberts int mlen; 1579da361acd1c899d533caec6cae5a336f6ab35e076Neil Roberts 1580da361acd1c899d533caec6cae5a336f6ab35e076Neil Roberts if (src.file == BAD_FILE) { 1581da361acd1c899d533caec6cae5a336f6ab35e076Neil Roberts /* Dummy payload */ 1582da361acd1c899d533caec6cae5a336f6ab35e076Neil Roberts payload = bld.vgrf(BRW_REGISTER_TYPE_F, 1); 1583da361acd1c899d533caec6cae5a336f6ab35e076Neil Roberts mlen = 1; 1584da361acd1c899d533caec6cae5a336f6ab35e076Neil Roberts } else { 1585da361acd1c899d533caec6cae5a336f6ab35e076Neil Roberts payload = src; 1586da361acd1c899d533caec6cae5a336f6ab35e076Neil Roberts mlen = 2 * bld.dispatch_width() / 8; 1587da361acd1c899d533caec6cae5a336f6ab35e076Neil Roberts } 1588da361acd1c899d533caec6cae5a336f6ab35e076Neil Roberts 1589da361acd1c899d533caec6cae5a336f6ab35e076Neil Roberts inst = bld.emit(opcode, dst, payload, desc); 1590da361acd1c899d533caec6cae5a336f6ab35e076Neil Roberts inst->mlen = mlen; 1591da361acd1c899d533caec6cae5a336f6ab35e076Neil Roberts /* 2 floats per slot returned */ 15922d7d4a791083ff63f37ac1e40bfe8b448e7f8045Francisco Jerez inst->size_written = 2 * dst.component_size(inst->exec_size); 1593ac1181ffbef5250cb3b651e047cce5116727c34cKenneth Graunke inst->pi_noperspective = interpolation == INTERP_MODE_NOPERSPECTIVE; 1594da361acd1c899d533caec6cae5a336f6ab35e076Neil Roberts 159518f67c8a69fcde5d3f585effeef670d0861b0730Kenneth Graunke wm_prog_data->pulls_bary = true; 159618f67c8a69fcde5d3f585effeef670d0861b0730Kenneth Graunke 1597da361acd1c899d533caec6cae5a336f6ab35e076Neil Roberts return inst; 1598da361acd1c899d533caec6cae5a336f6ab35e076Neil Roberts} 1599da361acd1c899d533caec6cae5a336f6ab35e076Neil Roberts 160036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke/** 160136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * Computes 1 << x, given a D/UD register containing some value x. 160236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke */ 160336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunkestatic fs_reg 160436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunkeintexp2(const fs_builder &bld, const fs_reg &x) 160536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke{ 160636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke assert(x.type == BRW_REGISTER_TYPE_UD || x.type == BRW_REGISTER_TYPE_D); 160736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 160836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke fs_reg result = bld.vgrf(x.type, 1); 160936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke fs_reg one = bld.vgrf(x.type, 1); 161036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 16113ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner bld.MOV(one, retype(brw_imm_d(1), one.type)); 161236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke bld.SHL(result, one, x); 161336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke return result; 161436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke} 161536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 161636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunkevoid 161736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunkefs_visitor::emit_gs_end_primitive(const nir_src &vertex_count_nir_src) 161836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke{ 161936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke assert(stage == MESA_SHADER_GEOMETRY); 162036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 1621e51e055fcdf8107aafaba358fa65b00f963e1728Kenneth Graunke struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(prog_data); 162236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 1623b27dfa5403ed1884999524417c08d2bc50365965Ian Romanick if (gs_compile->control_data_header_size_bits == 0) 1624b27dfa5403ed1884999524417c08d2bc50365965Ian Romanick return; 1625b27dfa5403ed1884999524417c08d2bc50365965Ian Romanick 162636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke /* We can only do EndPrimitive() functionality when the control data 162736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * consists of cut bits. Fortunately, the only time it isn't is when the 162836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * output type is points, in which case EndPrimitive() is a no-op. 162936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke */ 163036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke if (gs_prog_data->control_data_format != 163136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT) { 163236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke return; 163336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke } 163436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 163536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke /* Cut bits use one bit per vertex. */ 163636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke assert(gs_compile->control_data_bits_per_vertex == 1); 163736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 163836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke fs_reg vertex_count = get_nir_src(vertex_count_nir_src); 163936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke vertex_count.type = BRW_REGISTER_TYPE_UD; 164036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 164136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke /* Cut bit n should be set to 1 if EndPrimitive() was called after emitting 164236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * vertex n, 0 otherwise. So all we need to do here is mark bit 164336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * (vertex_count - 1) % 32 in the cut_bits register to indicate that 164436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * EndPrimitive() was called after emitting vertex (vertex_count - 1); 164536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * vec4_gs_visitor::emit_control_data_bits() will take care of the rest. 164636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * 164736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * Note that if EndPrimitive() is called before emitting any vertices, this 164836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * will cause us to set bit 31 of the control_data_bits register to 1. 164936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * That's fine because: 165036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * 165136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * - If max_vertices < 32, then vertex number 31 (zero-based) will never be 165236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * output, so the hardware will ignore cut bit 31. 165336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * 165436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * - If max_vertices == 32, then vertex number 31 is guaranteed to be the 165536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * last vertex, so setting cut bit 31 has no effect (since the primitive 165636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * is automatically ended when the GS terminates). 165736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * 165836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * - If max_vertices > 32, then the ir_emit_vertex visitor will reset the 165936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * control_data_bits register to 0 when the first vertex is emitted. 166036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke */ 166136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 166236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke const fs_builder abld = bld.annotate("end primitive"); 166336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 166436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke /* control_data_bits |= 1 << ((vertex_count - 1) % 32) */ 166536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke fs_reg prev_count = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); 16663ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner abld.ADD(prev_count, vertex_count, brw_imm_ud(0xffffffffu)); 166736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke fs_reg mask = intexp2(abld, prev_count); 166836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke /* Note: we're relying on the fact that the GEN SHL instruction only pays 166936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * attention to the lower 5 bits of its second source argument, so on this 167036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * architecture, 1 << (vertex_count - 1) is equivalent to 1 << 167136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * ((vertex_count - 1) % 32). 167236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke */ 167336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke abld.OR(this->control_data_bits, this->control_data_bits, mask); 167436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke} 167536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 167636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunkevoid 167736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunkefs_visitor::emit_gs_control_data_bits(const fs_reg &vertex_count) 167836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke{ 167936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke assert(stage == MESA_SHADER_GEOMETRY); 168036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke assert(gs_compile->control_data_bits_per_vertex != 0); 168136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 1682e51e055fcdf8107aafaba358fa65b00f963e1728Kenneth Graunke struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(prog_data); 168336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 168436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke const fs_builder abld = bld.annotate("emit control data bits"); 168536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke const fs_builder fwa_bld = bld.exec_all(); 168636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 168736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke /* We use a single UD register to accumulate control data bits (32 bits 168836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * for each of the SIMD8 channels). So we need to write a DWord (32 bits) 168936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * at a time. 169036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * 169136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * Unfortunately, the URB_WRITE_SIMD8 message uses 128-bit (OWord) offsets. 169236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * We have select a 128-bit group via the Global and Per-Slot Offsets, then 169336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * use the Channel Mask phase to enable/disable which DWord within that 169436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * group to write. (Remember, different SIMD8 channels may have emitted 169536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * different numbers of vertices, so we may need per-slot offsets.) 169636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * 169736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * Channel masking presents an annoying problem: we may have to replicate 169836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * the data up to 4 times: 169936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * 170036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * Msg = Handles, Per-Slot Offsets, Channel Masks, Data, Data, Data, Data. 170136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * 170236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * To avoid penalizing shaders that emit a small number of vertices, we 170336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * can avoid these sometimes: if the size of the control data header is 170436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * <= 128 bits, then there is only 1 OWord. All SIMD8 channels will land 170536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * land in the same 128-bit group, so we can skip per-slot offsets. 170636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * 170736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * Similarly, if the control data header is <= 32 bits, there is only one 170836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * DWord, so we can skip channel masks. 170936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke */ 171036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke enum opcode opcode = SHADER_OPCODE_URB_WRITE_SIMD8; 171136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 171236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke fs_reg channel_mask, per_slot_offset; 171336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 171436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke if (gs_compile->control_data_header_size_bits > 32) { 171536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke opcode = SHADER_OPCODE_URB_WRITE_SIMD8_MASKED; 171636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke channel_mask = vgrf(glsl_type::uint_type); 171736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke } 171836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 171936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke if (gs_compile->control_data_header_size_bits > 128) { 172036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke opcode = SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT; 172136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke per_slot_offset = vgrf(glsl_type::uint_type); 172236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke } 172336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 172436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke /* Figure out which DWord we're trying to write to using the formula: 172536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * 172636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * dword_index = (vertex_count - 1) * bits_per_vertex / 32 172736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * 172836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * Since bits_per_vertex is a power of two, and is known at compile 172936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * time, this can be optimized to: 173036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * 173136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * dword_index = (vertex_count - 1) >> (6 - log2(bits_per_vertex)) 173236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke */ 173336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke if (opcode != SHADER_OPCODE_URB_WRITE_SIMD8) { 173436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke fs_reg dword_index = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); 173536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke fs_reg prev_count = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); 17363ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner abld.ADD(prev_count, vertex_count, brw_imm_ud(0xffffffffu)); 173736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke unsigned log2_bits_per_vertex = 1738027cbf00f248bda325521db8f56a3718898da46bMathias Fröhlich util_last_bit(gs_compile->control_data_bits_per_vertex); 17393ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner abld.SHR(dword_index, prev_count, brw_imm_ud(6u - log2_bits_per_vertex)); 174036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 174136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke if (per_slot_offset.file != BAD_FILE) { 174236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke /* Set the per-slot offset to dword_index / 4, so that we'll write to 174336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * the appropriate OWord within the control data header. 174436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke */ 17453ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner abld.SHR(per_slot_offset, dword_index, brw_imm_ud(2u)); 174636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke } 174736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 174836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke /* Set the channel masks to 1 << (dword_index % 4), so that we'll 174936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * write to the appropriate DWORD within the OWORD. 175036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke */ 175136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke fs_reg channel = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); 17523ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner fwa_bld.AND(channel, dword_index, brw_imm_ud(3u)); 175336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke channel_mask = intexp2(fwa_bld, channel); 175436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke /* Then the channel masks need to be in bits 23:16. */ 17553ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner fwa_bld.SHL(channel_mask, channel_mask, brw_imm_ud(16u)); 175636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke } 175736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 175836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke /* Store the control data bits in the message payload and send it. */ 175936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke int mlen = 2; 176036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke if (channel_mask.file != BAD_FILE) 176136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke mlen += 4; /* channel masks, plus 3 extra copies of the data */ 176236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke if (per_slot_offset.file != BAD_FILE) 176336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke mlen++; 176436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 176536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, mlen); 176636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke fs_reg *sources = ralloc_array(mem_ctx, fs_reg, mlen); 176736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke int i = 0; 176836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke sources[i++] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)); 176936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke if (per_slot_offset.file != BAD_FILE) 177036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke sources[i++] = per_slot_offset; 177136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke if (channel_mask.file != BAD_FILE) 177236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke sources[i++] = channel_mask; 177336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke while (i < mlen) { 177436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke sources[i++] = this->control_data_bits; 177536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke } 177636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 177736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke abld.LOAD_PAYLOAD(payload, sources, mlen, mlen); 177836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke fs_inst *inst = abld.emit(opcode, reg_undef, payload); 177936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke inst->mlen = mlen; 178036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke /* We need to increment Global Offset by 256-bits to make room for 178136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * Broadwell's extra "Vertex Count" payload at the beginning of the 178236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * URB entry. Since this is an OWord message, Global Offset is counted 178336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * in 128-bit units, so we must set it to 2. 178436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke */ 178536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke if (gs_prog_data->static_vertex_count == -1) 178636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke inst->offset = 2; 178736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke} 178836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 178936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunkevoid 179036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunkefs_visitor::set_gs_stream_control_data_bits(const fs_reg &vertex_count, 179136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke unsigned stream_id) 179236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke{ 179336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke /* control_data_bits |= stream_id << ((2 * (vertex_count - 1)) % 32) */ 179436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 179536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke /* Note: we are calling this *before* increasing vertex_count, so 179636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * this->vertex_count == vertex_count - 1 in the formula above. 179736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke */ 179836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 179936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke /* Stream mode uses 2 bits per vertex */ 180036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke assert(gs_compile->control_data_bits_per_vertex == 2); 180136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 180236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke /* Must be a valid stream */ 180336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke assert(stream_id >= 0 && stream_id < MAX_VERTEX_STREAMS); 180436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 180536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke /* Control data bits are initialized to 0 so we don't have to set any 180636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * bits when sending vertices to stream 0. 180736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke */ 180836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke if (stream_id == 0) 180936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke return; 181036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 181136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke const fs_builder abld = bld.annotate("set stream control data bits", NULL); 181236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 181336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke /* reg::sid = stream_id */ 181436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke fs_reg sid = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); 18153ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner abld.MOV(sid, brw_imm_ud(stream_id)); 181636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 181736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke /* reg:shift_count = 2 * (vertex_count - 1) */ 181836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke fs_reg shift_count = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); 18193ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner abld.SHL(shift_count, vertex_count, brw_imm_ud(1u)); 182036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 182136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke /* Note: we're relying on the fact that the GEN SHL instruction only pays 182236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * attention to the lower 5 bits of its second source argument, so on this 182336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * architecture, stream_id << 2 * (vertex_count - 1) is equivalent to 182436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * stream_id << ((2 * (vertex_count - 1)) % 32). 182536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke */ 182636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke fs_reg mask = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); 182736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke abld.SHL(mask, sid, shift_count); 182836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke abld.OR(this->control_data_bits, this->control_data_bits, mask); 182936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke} 183036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 183136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunkevoid 183236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunkefs_visitor::emit_gs_vertex(const nir_src &vertex_count_nir_src, 183336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke unsigned stream_id) 183436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke{ 183536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke assert(stage == MESA_SHADER_GEOMETRY); 183636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 1837e51e055fcdf8107aafaba358fa65b00f963e1728Kenneth Graunke struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(prog_data); 183836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 183936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke fs_reg vertex_count = get_nir_src(vertex_count_nir_src); 184036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke vertex_count.type = BRW_REGISTER_TYPE_UD; 184136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 184236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke /* Haswell and later hardware ignores the "Render Stream Select" bits 184336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * from the 3DSTATE_STREAMOUT packet when the SOL stage is disabled, 184436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * and instead sends all primitives down the pipeline for rasterization. 184536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * If the SOL stage is enabled, "Render Stream Select" is honored and 184636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * primitives bound to non-zero streams are discarded after stream output. 184736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * 184836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * Since the only purpose of primives sent to non-zero streams is to 184936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * be recorded by transform feedback, we can simply discard all geometry 185036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * bound to these streams when transform feedback is disabled. 185136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke */ 1852e1af20f18a86f52a9640faf2d4ff8a71b0a4fa9bTimothy Arceri if (stream_id > 0 && !nir->info->has_transform_feedback_varyings) 185336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke return; 185436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 185536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke /* If we're outputting 32 control data bits or less, then we can wait 185636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * until the shader is over to output them all. Otherwise we need to 185736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * output them as we go. Now is the time to do it, since we're about to 185836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * output the vertex_count'th vertex, so it's guaranteed that the 185936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * control data bits associated with the (vertex_count - 1)th vertex are 186036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * correct. 186136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke */ 186236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke if (gs_compile->control_data_header_size_bits > 32) { 186336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke const fs_builder abld = 186436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke bld.annotate("emit vertex: emit control data bits"); 186536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 186636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke /* Only emit control data bits if we've finished accumulating a batch 186736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * of 32 bits. This is the case when: 186836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * 186936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * (vertex_count * bits_per_vertex) % 32 == 0 187036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * 187136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * (in other words, when the last 5 bits of vertex_count * 187236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * bits_per_vertex are 0). Assuming bits_per_vertex == 2^n for some 187336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * integer n (which is always the case, since bits_per_vertex is 187436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * always 1 or 2), this is equivalent to requiring that the last 5-n 187536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * bits of vertex_count are 0: 187636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * 187736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * vertex_count & (2^(5-n) - 1) == 0 187836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * 187936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * 2^(5-n) == 2^5 / 2^n == 32 / bits_per_vertex, so this is 188036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * equivalent to: 188136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * 188236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * vertex_count & (32 / bits_per_vertex - 1) == 0 188336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * 188436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * TODO: If vertex_count is an immediate, we could do some of this math 188536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * at compile time... 188636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke */ 188736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke fs_inst *inst = 188836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke abld.AND(bld.null_reg_d(), vertex_count, 18893ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner brw_imm_ud(32u / gs_compile->control_data_bits_per_vertex - 1u)); 189036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke inst->conditional_mod = BRW_CONDITIONAL_Z; 189136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 189236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke abld.IF(BRW_PREDICATE_NORMAL); 189336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke /* If vertex_count is 0, then no control data bits have been 189436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * accumulated yet, so we can skip emitting them. 189536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke */ 18963ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner abld.CMP(bld.null_reg_d(), vertex_count, brw_imm_ud(0u), 189736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke BRW_CONDITIONAL_NEQ); 189836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke abld.IF(BRW_PREDICATE_NORMAL); 189936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke emit_gs_control_data_bits(vertex_count); 190036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke abld.emit(BRW_OPCODE_ENDIF); 190136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 190236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke /* Reset control_data_bits to 0 so we can start accumulating a new 190336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * batch. 190436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * 190536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * Note: in the case where vertex_count == 0, this neutralizes the 190636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * effect of any call to EndPrimitive() that the shader may have 190736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * made before outputting its first vertex. 190836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke */ 19093ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner inst = abld.MOV(this->control_data_bits, brw_imm_ud(0u)); 191036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke inst->force_writemask_all = true; 191136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke abld.emit(BRW_OPCODE_ENDIF); 191236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke } 191336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 191436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke emit_urb_writes(vertex_count); 191536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 191636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke /* In stream mode we have to set control data bits for all vertices 191736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * unless we have disabled control data bits completely (which we do 191836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke * do for GL_POINTS outputs that don't use streams). 191936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke */ 192036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke if (gs_compile->control_data_header_size_bits > 0 && 192136fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke gs_prog_data->control_data_format == 192236fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) { 192336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke set_gs_stream_control_data_bits(vertex_count, stream_id); 192436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke } 192536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke} 192636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 192736fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunkevoid 192836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunkefs_visitor::emit_gs_input_load(const fs_reg &dst, 192936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke const nir_src &vertex_src, 193078b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand unsigned base_offset, 193178b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand const nir_src &offset_src, 1932a102ef2d4fd01a946f949a45115d65abb6714a5bTimothy Arceri unsigned num_components, 1933a102ef2d4fd01a946f949a45115d65abb6714a5bTimothy Arceri unsigned first_component) 193436fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke{ 1935e51e055fcdf8107aafaba358fa65b00f963e1728Kenneth Graunke struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(prog_data); 193636fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 193778b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand nir_const_value *vertex_const = nir_src_as_const_value(vertex_src); 193878b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand nir_const_value *offset_const = nir_src_as_const_value(offset_src); 193978b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand const unsigned push_reg_count = gs_prog_data->base.urb_read_length * 8; 194078b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand 1941fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke /* Offset 0 is the VUE header, which contains VARYING_SLOT_LAYER [.y], 1942fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke * VARYING_SLOT_VIEWPORT [.z], and VARYING_SLOT_PSIZ [.w]. Only 1943fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke * gl_PointSize is available as a GS input, however, so it must be that. 1944fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke */ 194578b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand const bool is_point_size = (base_offset == 0); 1946fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke 19471c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke /* TODO: figure out push input layout for invocations == 1 */ 19481c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke if (gs_prog_data->invocations == 1 && 19491c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke offset_const != NULL && vertex_const != NULL && 1950084b24f5582567ebf5aa94b7f40ae3bdcb71316bIago Toral Quiroga 4 * (base_offset + offset_const->u32[0]) < push_reg_count) { 1951084b24f5582567ebf5aa94b7f40ae3bdcb71316bIago Toral Quiroga int imm_offset = (base_offset + offset_const->u32[0]) * 4 + 1952084b24f5582567ebf5aa94b7f40ae3bdcb71316bIago Toral Quiroga vertex_const->u32[0] * push_reg_count; 1953fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke /* This input was pushed into registers. */ 1954fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke if (is_point_size) { 1955fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke /* gl_PointSize comes in .w */ 1956fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke bld.MOV(dst, fs_reg(ATTR, imm_offset + 3, dst.type)); 1957fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke } else { 1958fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke for (unsigned i = 0; i < num_components; i++) { 1959fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke bld.MOV(offset(dst, bld, i), 1960c4be6e0b8d91746eccf334b9e20861af4036d06aKenneth Graunke fs_reg(ATTR, imm_offset + i + first_component, dst.type)); 1961fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke } 1962fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke } 19635fc37726501bc65f3bbaef2573ac89e980f1a412Kenneth Graunke return; 19645fc37726501bc65f3bbaef2573ac89e980f1a412Kenneth Graunke } 196536fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 19665fc37726501bc65f3bbaef2573ac89e980f1a412Kenneth Graunke /* Resort to the pull model. Ensure the VUE handles are provided. */ 19675fc37726501bc65f3bbaef2573ac89e980f1a412Kenneth Graunke gs_prog_data->base.include_vue_handles = true; 196836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 19695fc37726501bc65f3bbaef2573ac89e980f1a412Kenneth Graunke unsigned first_icp_handle = gs_prog_data->include_primitive_id ? 3 : 2; 19701c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke fs_reg icp_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); 19715fc37726501bc65f3bbaef2573ac89e980f1a412Kenneth Graunke 19725fc37726501bc65f3bbaef2573ac89e980f1a412Kenneth Graunke if (gs_prog_data->invocations == 1) { 1973fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke if (vertex_const) { 1974fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke /* The vertex index is constant; just select the proper URB handle. */ 1975fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke icp_handle = 1976084b24f5582567ebf5aa94b7f40ae3bdcb71316bIago Toral Quiroga retype(brw_vec8_grf(first_icp_handle + vertex_const->i32[0], 0), 1977fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke BRW_REGISTER_TYPE_UD); 197836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke } else { 1979fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke /* The vertex index is non-constant. We need to use indirect 1980fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke * addressing to fetch the proper URB handle. 1981fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke * 1982fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke * First, we start with the sequence <7, 6, 5, 4, 3, 2, 1, 0> 1983fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke * indicating that channel <n> should read the handle from 1984fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke * DWord <n>. We convert that to bytes by multiplying by 4. 1985fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke * 1986fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke * Next, we convert the vertex index to bytes by multiplying 1987fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke * by 32 (shifting by 5), and add the two together. This is 1988fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke * the final indirect byte offset. 1989fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke */ 1990fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke fs_reg sequence = bld.vgrf(BRW_REGISTER_TYPE_W, 1); 1991fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke fs_reg channel_offsets = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); 1992fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke fs_reg vertex_offset_bytes = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); 1993fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke fs_reg icp_offset_bytes = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); 1994fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke 1995fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke /* sequence = <7, 6, 5, 4, 3, 2, 1, 0> */ 1996fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke bld.MOV(sequence, fs_reg(brw_imm_v(0x76543210))); 1997fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke /* channel_offsets = 4 * sequence = <28, 24, 20, 16, 12, 8, 4, 0> */ 19983ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner bld.SHL(channel_offsets, sequence, brw_imm_ud(2u)); 1999fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke /* Convert vertex_index to bytes (multiply by 32) */ 2000fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke bld.SHL(vertex_offset_bytes, 2001fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke retype(get_nir_src(vertex_src), BRW_REGISTER_TYPE_UD), 2002fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke brw_imm_ud(5u)); 2003fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke bld.ADD(icp_offset_bytes, vertex_offset_bytes, channel_offsets); 2004fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke 2005fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke /* Use first_icp_handle as the base offset. There is one register 2006fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke * of URB handles per vertex, so inform the register allocator that 2007e1af20f18a86f52a9640faf2d4ff8a71b0a4fa9bTimothy Arceri * we might read up to nir->info->gs.vertices_in registers. 2008fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke */ 2009fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke bld.emit(SHADER_OPCODE_MOV_INDIRECT, icp_handle, 2010e1e27b0917249448a481b6681aac375505f728c3Samuel Iglesias Gonsálvez retype(brw_vec8_grf(first_icp_handle, 0), icp_handle.type), 2011fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke fs_reg(icp_offset_bytes), 2012e1af20f18a86f52a9640faf2d4ff8a71b0a4fa9bTimothy Arceri brw_imm_ud(nir->info->gs.vertices_in * REG_SIZE)); 201336fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke } 20141c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke } else { 20151c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke assert(gs_prog_data->invocations > 1); 20161c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke 20171c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke if (vertex_const) { 20181c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke assert(devinfo->gen >= 9 || vertex_const->i32[0] <= 5); 20191c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke bld.MOV(icp_handle, 20201c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke retype(brw_vec1_grf(first_icp_handle + 20211c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke vertex_const->i32[0] / 8, 20221c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke vertex_const->i32[0] % 8), 20231c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke BRW_REGISTER_TYPE_UD)); 20241c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke } else { 20251c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke /* The vertex index is non-constant. We need to use indirect 20261c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke * addressing to fetch the proper URB handle. 20271c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke * 20281c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke */ 20291c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke fs_reg icp_offset_bytes = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); 20301c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke 20311c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke /* Convert vertex_index to bytes (multiply by 4) */ 20321c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke bld.SHL(icp_offset_bytes, 20331c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke retype(get_nir_src(vertex_src), BRW_REGISTER_TYPE_UD), 20341c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke brw_imm_ud(2u)); 20351c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke 20361c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke /* Use first_icp_handle as the base offset. There is one DWord 20371c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke * of URB handles per vertex, so inform the register allocator that 2038e1af20f18a86f52a9640faf2d4ff8a71b0a4fa9bTimothy Arceri * we might read up to ceil(nir->info->gs.vertices_in / 8) registers. 20391c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke */ 20401c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke bld.emit(SHADER_OPCODE_MOV_INDIRECT, icp_handle, 2041e1e27b0917249448a481b6681aac375505f728c3Samuel Iglesias Gonsálvez retype(brw_vec8_grf(first_icp_handle, 0), icp_handle.type), 20421c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke fs_reg(icp_offset_bytes), 2043e1af20f18a86f52a9640faf2d4ff8a71b0a4fa9bTimothy Arceri brw_imm_ud(DIV_ROUND_UP(nir->info->gs.vertices_in, 8) * 20441c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke REG_SIZE)); 20451c41cb58def637c9e033cb7bf108f1096c9ae63cKenneth Graunke } 20465fc37726501bc65f3bbaef2573ac89e980f1a412Kenneth Graunke } 2047fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke 20485fc37726501bc65f3bbaef2573ac89e980f1a412Kenneth Graunke fs_inst *inst; 20492b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez 20502b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez fs_reg tmp_dst = dst; 20512b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez fs_reg indirect_offset = get_nir_src(offset_src); 20522b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez unsigned num_iterations = 1; 20532b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez unsigned orig_num_components = num_components; 20542b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez 20552b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez if (type_sz(dst.type) == 8) { 20562b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez if (num_components > 2) { 20572b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez num_iterations = 2; 20582b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez num_components = 2; 20592b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez } 20602b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez fs_reg tmp = fs_reg(VGRF, alloc.allocate(4), dst.type); 20612b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez tmp_dst = tmp; 20628b80e9c31db62ccf54ab593b47016ea514dec81cTimothy Arceri first_component = first_component / 2; 20632b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez } 20642b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez 20652b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez for (unsigned iter = 0; iter < num_iterations; iter++) { 20662b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez if (offset_const) { 20672b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez /* Constant indexing - use global offset. */ 2068a102ef2d4fd01a946f949a45115d65abb6714a5bTimothy Arceri if (first_component != 0) { 2069a102ef2d4fd01a946f949a45115d65abb6714a5bTimothy Arceri unsigned read_components = num_components + first_component; 2070a102ef2d4fd01a946f949a45115d65abb6714a5bTimothy Arceri fs_reg tmp = bld.vgrf(dst.type, read_components); 2071a102ef2d4fd01a946f949a45115d65abb6714a5bTimothy Arceri inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp, icp_handle); 20722d7d4a791083ff63f37ac1e40bfe8b448e7f8045Francisco Jerez inst->size_written = read_components * 20732d7d4a791083ff63f37ac1e40bfe8b448e7f8045Francisco Jerez tmp.component_size(inst->exec_size); 2074a102ef2d4fd01a946f949a45115d65abb6714a5bTimothy Arceri for (unsigned i = 0; i < num_components; i++) { 2075a102ef2d4fd01a946f949a45115d65abb6714a5bTimothy Arceri bld.MOV(offset(tmp_dst, bld, i), 2076a102ef2d4fd01a946f949a45115d65abb6714a5bTimothy Arceri offset(tmp, bld, i + first_component)); 2077a102ef2d4fd01a946f949a45115d65abb6714a5bTimothy Arceri } 2078a102ef2d4fd01a946f949a45115d65abb6714a5bTimothy Arceri } else { 2079a102ef2d4fd01a946f949a45115d65abb6714a5bTimothy Arceri inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp_dst, 2080a102ef2d4fd01a946f949a45115d65abb6714a5bTimothy Arceri icp_handle); 20812d7d4a791083ff63f37ac1e40bfe8b448e7f8045Francisco Jerez inst->size_written = num_components * 20822d7d4a791083ff63f37ac1e40bfe8b448e7f8045Francisco Jerez tmp_dst.component_size(inst->exec_size); 2083a102ef2d4fd01a946f949a45115d65abb6714a5bTimothy Arceri } 20842b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez inst->offset = base_offset + offset_const->u32[0]; 20852b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez inst->mlen = 1; 20862b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez } else { 20872b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez /* Indirect indexing - use per-slot offsets as well. */ 20882b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez const fs_reg srcs[] = { icp_handle, indirect_offset }; 20899d9b0b54cdc212c372ac67cc14d7ba1a16cc69efTimothy Arceri unsigned read_components = num_components + first_component; 20909d9b0b54cdc212c372ac67cc14d7ba1a16cc69efTimothy Arceri fs_reg tmp = bld.vgrf(dst.type, read_components); 20912b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); 20922b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0); 20939d9b0b54cdc212c372ac67cc14d7ba1a16cc69efTimothy Arceri if (first_component != 0) { 20949d9b0b54cdc212c372ac67cc14d7ba1a16cc69efTimothy Arceri inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, tmp, 20959d9b0b54cdc212c372ac67cc14d7ba1a16cc69efTimothy Arceri payload); 20962d7d4a791083ff63f37ac1e40bfe8b448e7f8045Francisco Jerez inst->size_written = read_components * 20972d7d4a791083ff63f37ac1e40bfe8b448e7f8045Francisco Jerez tmp.component_size(inst->exec_size); 20989d9b0b54cdc212c372ac67cc14d7ba1a16cc69efTimothy Arceri for (unsigned i = 0; i < num_components; i++) { 20999d9b0b54cdc212c372ac67cc14d7ba1a16cc69efTimothy Arceri bld.MOV(offset(tmp_dst, bld, i), 21009d9b0b54cdc212c372ac67cc14d7ba1a16cc69efTimothy Arceri offset(tmp, bld, i + first_component)); 21019d9b0b54cdc212c372ac67cc14d7ba1a16cc69efTimothy Arceri } 21029d9b0b54cdc212c372ac67cc14d7ba1a16cc69efTimothy Arceri } else { 21039d9b0b54cdc212c372ac67cc14d7ba1a16cc69efTimothy Arceri inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, tmp_dst, 21049d9b0b54cdc212c372ac67cc14d7ba1a16cc69efTimothy Arceri payload); 21052d7d4a791083ff63f37ac1e40bfe8b448e7f8045Francisco Jerez inst->size_written = num_components * 21062d7d4a791083ff63f37ac1e40bfe8b448e7f8045Francisco Jerez tmp_dst.component_size(inst->exec_size); 21079d9b0b54cdc212c372ac67cc14d7ba1a16cc69efTimothy Arceri } 21082b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez inst->offset = base_offset; 21092b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez inst->mlen = 2; 21102b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez } 21112b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez 21122b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez if (type_sz(dst.type) == 8) { 21132b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez shuffle_32bit_load_result_to_64bit_data( 21142b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez bld, tmp_dst, retype(tmp_dst, BRW_REGISTER_TYPE_F), num_components); 21152b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez 21162b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez for (unsigned c = 0; c < num_components; c++) 21172b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez bld.MOV(offset(dst, bld, iter * 2 + c), offset(tmp_dst, bld, c)); 21182b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez } 21192b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez 21202b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez if (num_iterations > 1) { 21212b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez num_components = orig_num_components - 2; 21222b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez if(offset_const) { 21232b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez base_offset++; 21242b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez } else { 21252b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez fs_reg new_indirect = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); 21262b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez bld.ADD(new_indirect, indirect_offset, brw_imm_ud(1u)); 21272b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez indirect_offset = new_indirect; 21282b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez } 21292b648ec17c2934802dd56452d11d78ec2d525a06Samuel Iglesias Gonsálvez } 21305fc37726501bc65f3bbaef2573ac89e980f1a412Kenneth Graunke } 2131fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke 21325fc37726501bc65f3bbaef2573ac89e980f1a412Kenneth Graunke if (is_point_size) { 21335fc37726501bc65f3bbaef2573ac89e980f1a412Kenneth Graunke /* Read the whole VUE header (because of alignment) and read .w. */ 21345fc37726501bc65f3bbaef2573ac89e980f1a412Kenneth Graunke fs_reg tmp = bld.vgrf(dst.type, 4); 21355fc37726501bc65f3bbaef2573ac89e980f1a412Kenneth Graunke inst->dst = tmp; 213669570bbad876bb9da609c3b651aacda28cecc542Francisco Jerez inst->size_written = 4 * REG_SIZE; 21375fc37726501bc65f3bbaef2573ac89e980f1a412Kenneth Graunke bld.MOV(dst, offset(tmp, bld, 3)); 213836fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke } 213936fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke} 214036fd65381756ed1b8f774f7fcdd555941a3d39e1Kenneth Graunke 2141a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunkefs_reg 2142a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunkefs_visitor::get_indirect_offset(nir_intrinsic_instr *instr) 2143a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke{ 2144a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke nir_src *offset_src = nir_get_io_offset_src(instr); 2145a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke nir_const_value *const_value = nir_src_as_const_value(*offset_src); 2146a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke 2147a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke if (const_value) { 2148a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke /* The only constant offset we should find is 0. brw_nir.c's 2149a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke * add_const_offset_to_base() will fold other constant offsets 2150a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke * into instr->const_index[0]. 2151a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke */ 2152084b24f5582567ebf5aa94b7f40ae3bdcb71316bIago Toral Quiroga assert(const_value->u32[0] == 0); 2153a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke return fs_reg(); 2154a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke } 2155a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke 2156a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke return get_nir_src(*offset_src); 2157a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke} 2158a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke 21596eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quirogastatic void 21606eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quirogado_untyped_vector_read(const fs_builder &bld, 21616eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga const fs_reg dest, 21626eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga const fs_reg surf_index, 21636eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga const fs_reg offset_reg, 21646eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga unsigned num_components) 21656eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga{ 21666eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga if (type_sz(dest.type) == 4) { 21676eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg, 21686eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga 1 /* dims */, 21696eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga num_components, 21706eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga BRW_PREDICATE_NONE); 21716eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga read_result.type = dest.type; 21726eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga for (unsigned i = 0; i < num_components; i++) 21736eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga bld.MOV(offset(dest, bld, i), offset(read_result, bld, i)); 21746eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga } else if (type_sz(dest.type) == 8) { 21756eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga /* Reading a dvec, so we need to: 21766eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga * 21776eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga * 1. Multiply num_components by 2, to account for the fact that we 21786eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga * need to read 64-bit components. 21796eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga * 2. Shuffle the result of the load to form valid 64-bit elements 21806eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga * 3. Emit a second load (for components z/w) if needed. 21816eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga */ 21826eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga fs_reg read_offset = bld.vgrf(BRW_REGISTER_TYPE_UD); 21836eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga bld.MOV(read_offset, offset_reg); 21846eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga 21856eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga int iters = num_components <= 2 ? 1 : 2; 21866eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga 21876eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga /* Load the dvec, the first iteration loads components x/y, the second 21886eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga * iteration, if needed, loads components z/w 21896eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga */ 21906eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga for (int it = 0; it < iters; it++) { 21916eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga /* Compute number of components to read in this iteration */ 21926eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga int iter_components = MIN2(2, num_components); 21936eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga num_components -= iter_components; 21946eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga 21956eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga /* Read. Since this message reads 32-bit components, we need to 21966eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga * read twice as many components. 21976eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga */ 21986eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga fs_reg read_result = emit_untyped_read(bld, surf_index, read_offset, 21996eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga 1 /* dims */, 22006eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga iter_components * 2, 22016eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga BRW_PREDICATE_NONE); 22026eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga 22036eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga /* Shuffle the 32-bit load result into valid 64-bit data */ 22046eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga const fs_reg packed_result = bld.vgrf(dest.type, iter_components); 22056eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga shuffle_32bit_load_result_to_64bit_data( 22066eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga bld, packed_result, read_result, iter_components); 22076eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga 22086eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga /* Move each component to its destination */ 22096eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga read_result = retype(read_result, BRW_REGISTER_TYPE_DF); 22106eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga for (int c = 0; c < iter_components; c++) { 22116eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga bld.MOV(offset(dest, bld, it * 2 + c), 22126eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga offset(packed_result, bld, c)); 22136eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga } 22146eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga 22156eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga bld.ADD(read_offset, read_offset, brw_imm_ud(16)); 22166eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga } 22176eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga } else { 22186eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga unreachable("Unsupported type"); 22196eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga } 22206eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga} 22216eab06b866916d4fd52adf7b8bb6113948a3811aIago Toral Quiroga 22222faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbottvoid 2223918bda23dda36004c95f6441328ecc892e068886Kenneth Graunkefs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld, 2224918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke nir_intrinsic_instr *instr) 22252faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott{ 2226918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke assert(stage == MESA_SHADER_VERTEX); 2227918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 22282faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott fs_reg dest; 22292faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott if (nir_intrinsic_infos[instr->intrinsic].has_dest) 22302faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott dest = get_nir_dest(instr->dest); 22312faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 2232918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke switch (instr->intrinsic) { 2233918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_load_vertex_id: 2234918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke unreachable("should be lowered by lower_vertex_id()"); 2235918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 2236918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_load_vertex_id_zero_base: 2237918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_load_base_vertex: 223817ebb55a14b5a9aa639845fbda9330ef9421834aKristian Høgsberg Kristensen case nir_intrinsic_load_instance_id: 2239cddfc2cefa93b884c40329dcb193fe4fb22143abKristian Høgsberg Kristensen case nir_intrinsic_load_base_instance: 2240cddfc2cefa93b884c40329dcb193fe4fb22143abKristian Høgsberg Kristensen case nir_intrinsic_load_draw_id: { 2241918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic); 2242918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke fs_reg val = nir_system_values[sv]; 2243918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke assert(val.file != BAD_FILE); 2244918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke dest.type = val.type; 2245918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke bld.MOV(dest, val); 2246918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke break; 2247918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke } 2248918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 22493dba8516d6468866f2534f517358a6243eb0995eKenneth Graunke case nir_intrinsic_load_input: { 22503dba8516d6468866f2534f517358a6243eb0995eKenneth Graunke fs_reg src = fs_reg(ATTR, instr->const_index[0], dest.type); 22517f53fead5cf9a85c74a94d359dd5fccfbb87856cTimothy Arceri unsigned first_component = nir_intrinsic_component(instr); 22523dba8516d6468866f2534f517358a6243eb0995eKenneth Graunke unsigned num_components = instr->num_components; 22533dba8516d6468866f2534f517358a6243eb0995eKenneth Graunke enum brw_reg_type type = dest.type; 22543dba8516d6468866f2534f517358a6243eb0995eKenneth Graunke 22553dba8516d6468866f2534f517358a6243eb0995eKenneth Graunke nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); 22563dba8516d6468866f2534f517358a6243eb0995eKenneth Graunke assert(const_offset && "Indirect input loads not allowed"); 22573dba8516d6468866f2534f517358a6243eb0995eKenneth Graunke src = offset(src, bld, const_offset->u32[0]); 22583dba8516d6468866f2534f517358a6243eb0995eKenneth Graunke 22593dba8516d6468866f2534f517358a6243eb0995eKenneth Graunke for (unsigned j = 0; j < num_components; j++) { 22607f53fead5cf9a85c74a94d359dd5fccfbb87856cTimothy Arceri bld.MOV(offset(dest, bld, j), offset(src, bld, j + first_component)); 22613dba8516d6468866f2534f517358a6243eb0995eKenneth Graunke } 22623dba8516d6468866f2534f517358a6243eb0995eKenneth Graunke 22633dba8516d6468866f2534f517358a6243eb0995eKenneth Graunke if (type == BRW_REGISTER_TYPE_DF) { 22643dba8516d6468866f2534f517358a6243eb0995eKenneth Graunke /* Once the double vector is read, set again its original register 22653dba8516d6468866f2534f517358a6243eb0995eKenneth Graunke * type to continue with normal execution. 22663dba8516d6468866f2534f517358a6243eb0995eKenneth Graunke */ 22673dba8516d6468866f2534f517358a6243eb0995eKenneth Graunke src = retype(src, type); 22683dba8516d6468866f2534f517358a6243eb0995eKenneth Graunke dest = retype(dest, type); 22693dba8516d6468866f2534f517358a6243eb0995eKenneth Graunke } 22703dba8516d6468866f2534f517358a6243eb0995eKenneth Graunke 22713dba8516d6468866f2534f517358a6243eb0995eKenneth Graunke if (type_sz(src.type) == 8) { 22723dba8516d6468866f2534f517358a6243eb0995eKenneth Graunke shuffle_32bit_load_result_to_64bit_data(bld, 22733dba8516d6468866f2534f517358a6243eb0995eKenneth Graunke dest, 22743dba8516d6468866f2534f517358a6243eb0995eKenneth Graunke retype(dest, BRW_REGISTER_TYPE_F), 22753dba8516d6468866f2534f517358a6243eb0995eKenneth Graunke instr->num_components); 22763dba8516d6468866f2534f517358a6243eb0995eKenneth Graunke } 22773dba8516d6468866f2534f517358a6243eb0995eKenneth Graunke break; 22783dba8516d6468866f2534f517358a6243eb0995eKenneth Graunke } 22793dba8516d6468866f2534f517358a6243eb0995eKenneth Graunke 2280918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke default: 2281918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke nir_emit_intrinsic(bld, instr); 2282918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke break; 2283918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke } 2284918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke} 2285918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 2286918bda23dda36004c95f6441328ecc892e068886Kenneth Graunkevoid 22877d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunkefs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld, 22887d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke nir_intrinsic_instr *instr) 22897d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke{ 22907d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke assert(stage == MESA_SHADER_TESS_CTRL); 22917d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke struct brw_tcs_prog_key *tcs_key = (struct brw_tcs_prog_key *) key; 2292e51e055fcdf8107aafaba358fa65b00f963e1728Kenneth Graunke struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data); 22937d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 22947d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke fs_reg dst; 22957d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke if (nir_intrinsic_infos[instr->intrinsic].has_dest) 22967d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke dst = get_nir_dest(instr->dest); 22977d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 22987d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke switch (instr->intrinsic) { 22997d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke case nir_intrinsic_load_primitive_id: 23007d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke bld.MOV(dst, fs_reg(brw_vec1_grf(0, 1))); 23017d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke break; 23027d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke case nir_intrinsic_load_invocation_id: 23037d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke bld.MOV(retype(dst, invocation_id.type), invocation_id); 23047d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke break; 23057d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke case nir_intrinsic_load_patch_vertices_in: 23067d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke bld.MOV(retype(dst, BRW_REGISTER_TYPE_D), 23077d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke brw_imm_d(tcs_key->input_vertices)); 23087d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke break; 23097d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 23107d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke case nir_intrinsic_barrier: { 23117d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke if (tcs_prog_data->instances == 1) 23127d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke break; 23137d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 23147d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke fs_reg m0 = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); 2315159f0377556c45630cdc0721b193f34217a329b0Kenneth Graunke fs_reg m0_2 = component(m0, 2); 23167d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 2317159f0377556c45630cdc0721b193f34217a329b0Kenneth Graunke const fs_builder chanbld = bld.exec_all().group(1, 0); 23187d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 23197d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke /* Zero the message header */ 2320159f0377556c45630cdc0721b193f34217a329b0Kenneth Graunke bld.exec_all().MOV(m0, brw_imm_ud(0u)); 23217d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 23227d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke /* Copy "Barrier ID" from r0.2, bits 16:13 */ 2323159f0377556c45630cdc0721b193f34217a329b0Kenneth Graunke chanbld.AND(m0_2, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD), 23247d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke brw_imm_ud(INTEL_MASK(16, 13))); 23257d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 23267d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke /* Shift it up to bits 27:24. */ 2327159f0377556c45630cdc0721b193f34217a329b0Kenneth Graunke chanbld.SHL(m0_2, m0_2, brw_imm_ud(11)); 23287d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 23297d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke /* Set the Barrier Count and the enable bit */ 2330159f0377556c45630cdc0721b193f34217a329b0Kenneth Graunke chanbld.OR(m0_2, m0_2, 2331d14dd727f4aded5bd34a78dc2c81374a78114440Kenneth Graunke brw_imm_ud(tcs_prog_data->instances << 9 | (1 << 15))); 23327d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 23337d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke bld.emit(SHADER_OPCODE_BARRIER, bld.null_reg_ud(), m0); 23347d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke break; 23357d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke } 23367d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 23377d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke case nir_intrinsic_load_input: 23387d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke unreachable("nir_lower_io should never give us these."); 23397d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke break; 23407d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 23417d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke case nir_intrinsic_load_per_vertex_input: { 23427d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke fs_reg indirect_offset = get_indirect_offset(instr); 23437d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke unsigned imm_offset = instr->const_index[0]; 23447d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 23457d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke const nir_src &vertex_src = instr->src[0]; 23467d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke nir_const_value *vertex_const = nir_src_as_const_value(vertex_src); 23477d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 23487d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke fs_inst *inst; 23497d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 23507d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke fs_reg icp_handle; 23517d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 23527d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke if (vertex_const) { 23537d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke /* Emit a MOV to resolve <0,1,0> regioning. */ 23547d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke icp_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); 23557d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke bld.MOV(icp_handle, 23567d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke retype(brw_vec1_grf(1 + (vertex_const->i32[0] >> 3), 23577d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke vertex_const->i32[0] & 7), 23587d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke BRW_REGISTER_TYPE_UD)); 23597d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke } else if (tcs_prog_data->instances == 1 && 23607d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke vertex_src.is_ssa && 23617d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke vertex_src.ssa->parent_instr->type == nir_instr_type_intrinsic && 23627d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke nir_instr_as_intrinsic(vertex_src.ssa->parent_instr)->intrinsic == nir_intrinsic_load_invocation_id) { 23637d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke /* For the common case of only 1 instance, an array index of 23647d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke * gl_InvocationID means reading g1. Skip all the indirect work. 23657d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke */ 23667d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke icp_handle = retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD); 23677d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke } else { 23687d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke /* The vertex index is non-constant. We need to use indirect 23697d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke * addressing to fetch the proper URB handle. 23707d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke */ 23717d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke icp_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); 23727d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 23737d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke /* Each ICP handle is a single DWord (4 bytes) */ 23747d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke fs_reg vertex_offset_bytes = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); 23757d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke bld.SHL(vertex_offset_bytes, 23767d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke retype(get_nir_src(vertex_src), BRW_REGISTER_TYPE_UD), 23777d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke brw_imm_ud(2u)); 23787d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 23797d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke /* Start at g1. We might read up to 4 registers. */ 23807d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke bld.emit(SHADER_OPCODE_MOV_INDIRECT, icp_handle, 2381e1e27b0917249448a481b6681aac375505f728c3Samuel Iglesias Gonsálvez retype(brw_vec8_grf(1, 0), icp_handle.type), vertex_offset_bytes, 23827d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke brw_imm_ud(4 * REG_SIZE)); 23837d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke } 23847d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 2385cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga /* We can only read two double components with each URB read, so 2386cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga * we send two read messages in that case, each one loading up to 2387cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga * two double components. 2388cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga */ 2389cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga unsigned num_iterations = 1; 2390cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga unsigned num_components = instr->num_components; 239127e28197e8e82e8c47fda5d6e912c5cb62c03f4aTimothy Arceri unsigned first_component = nir_intrinsic_component(instr); 2392cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga fs_reg orig_dst = dst; 2393cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga if (type_sz(dst.type) == 8) { 239427e28197e8e82e8c47fda5d6e912c5cb62c03f4aTimothy Arceri first_component = first_component / 2; 2395cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga if (instr->num_components > 2) { 2396cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga num_iterations = 2; 2397cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga num_components = 2; 2398cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga } 2399cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga 2400cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga fs_reg tmp = fs_reg(VGRF, alloc.allocate(4), dst.type); 2401cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga dst = tmp; 24027d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke } 24037d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 2404cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga for (unsigned iter = 0; iter < num_iterations; iter++) { 2405cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga if (indirect_offset.file == BAD_FILE) { 2406cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga /* Constant indexing - use global offset. */ 2407cfff71a47a655e8cf930e858d408dc4db942ec7cTimothy Arceri if (first_component != 0) { 2408cfff71a47a655e8cf930e858d408dc4db942ec7cTimothy Arceri unsigned read_components = num_components + first_component; 2409cfff71a47a655e8cf930e858d408dc4db942ec7cTimothy Arceri fs_reg tmp = bld.vgrf(dst.type, read_components); 2410cfff71a47a655e8cf930e858d408dc4db942ec7cTimothy Arceri inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp, icp_handle); 2411cfff71a47a655e8cf930e858d408dc4db942ec7cTimothy Arceri for (unsigned i = 0; i < num_components; i++) { 2412cfff71a47a655e8cf930e858d408dc4db942ec7cTimothy Arceri bld.MOV(offset(dst, bld, i), 2413cfff71a47a655e8cf930e858d408dc4db942ec7cTimothy Arceri offset(tmp, bld, i + first_component)); 2414cfff71a47a655e8cf930e858d408dc4db942ec7cTimothy Arceri } 2415cfff71a47a655e8cf930e858d408dc4db942ec7cTimothy Arceri } else { 2416cfff71a47a655e8cf930e858d408dc4db942ec7cTimothy Arceri inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, icp_handle); 2417cfff71a47a655e8cf930e858d408dc4db942ec7cTimothy Arceri } 2418cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga inst->offset = imm_offset; 2419cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga inst->mlen = 1; 2420cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga } else { 2421cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga /* Indirect indexing - use per-slot offsets as well. */ 2422cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga const fs_reg srcs[] = { icp_handle, indirect_offset }; 2423cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); 2424cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0); 24252477e6cfada55563631c654fce9250e4fe276f0eTimothy Arceri if (first_component != 0) { 24262477e6cfada55563631c654fce9250e4fe276f0eTimothy Arceri unsigned read_components = num_components + first_component; 24272477e6cfada55563631c654fce9250e4fe276f0eTimothy Arceri fs_reg tmp = bld.vgrf(dst.type, read_components); 24282477e6cfada55563631c654fce9250e4fe276f0eTimothy Arceri inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, tmp, 24292477e6cfada55563631c654fce9250e4fe276f0eTimothy Arceri payload); 24302477e6cfada55563631c654fce9250e4fe276f0eTimothy Arceri for (unsigned i = 0; i < num_components; i++) { 24312477e6cfada55563631c654fce9250e4fe276f0eTimothy Arceri bld.MOV(offset(dst, bld, i), 24322477e6cfada55563631c654fce9250e4fe276f0eTimothy Arceri offset(tmp, bld, i + first_component)); 24332477e6cfada55563631c654fce9250e4fe276f0eTimothy Arceri } 24342477e6cfada55563631c654fce9250e4fe276f0eTimothy Arceri } else { 24352477e6cfada55563631c654fce9250e4fe276f0eTimothy Arceri inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst, 24362477e6cfada55563631c654fce9250e4fe276f0eTimothy Arceri payload); 24372477e6cfada55563631c654fce9250e4fe276f0eTimothy Arceri } 2438cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga inst->offset = imm_offset; 2439cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga inst->mlen = 2; 2440cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga } 24412d7d4a791083ff63f37ac1e40bfe8b448e7f8045Francisco Jerez inst->size_written = (num_components + first_component) * 24422d7d4a791083ff63f37ac1e40bfe8b448e7f8045Francisco Jerez inst->dst.component_size(inst->exec_size); 2443cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga 2444cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga /* If we are reading 64-bit data using 32-bit read messages we need 2445cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga * build proper 64-bit data elements by shuffling the low and high 2446cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga * 32-bit components around like we do for other things like UBOs 2447cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga * or SSBOs. 2448cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga */ 2449cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga if (type_sz(dst.type) == 8) { 2450cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga shuffle_32bit_load_result_to_64bit_data( 2451cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga bld, dst, retype(dst, BRW_REGISTER_TYPE_F), num_components); 2452cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga 2453cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga for (unsigned c = 0; c < num_components; c++) { 2454cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga bld.MOV(offset(orig_dst, bld, iter * 2 + c), 2455cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga offset(dst, bld, c)); 2456cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga } 2457cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga } 2458cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga 2459cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga /* Copy the temporary to the destination to deal with writemasking. 2460cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga * 2461cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga * Also attempt to deal with gl_PointSize being in the .w component. 2462cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga */ 2463cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga if (inst->offset == 0 && indirect_offset.file == BAD_FILE) { 2464cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga assert(type_sz(dst.type) < 8); 2465cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga inst->dst = bld.vgrf(dst.type, 4); 246669570bbad876bb9da609c3b651aacda28cecc542Francisco Jerez inst->size_written = 4 * REG_SIZE; 2467cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga bld.MOV(dst, offset(inst->dst, bld, 3)); 2468cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga } 2469cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga 2470cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga /* If we are loading double data and we need a second read message 2471cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga * adjust the write offset 2472cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga */ 2473cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga if (num_iterations > 1) { 2474cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga num_components = instr->num_components - 2; 24751737e75bfb85eb22a30e4f1c69a825b3abd946f6Iago Toral Quiroga imm_offset++; 2476cda3435ea85904a17c5c23a7c044e59ba0181b96Iago Toral Quiroga } 24777d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke } 24787d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke break; 24797d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke } 24807d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 24817d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke case nir_intrinsic_load_output: 24827d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke case nir_intrinsic_load_per_vertex_output: { 24837d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke fs_reg indirect_offset = get_indirect_offset(instr); 24847d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke unsigned imm_offset = instr->const_index[0]; 2485ad5dd39984467b29d20e03ec8bd26f6f1d2e97adTimothy Arceri unsigned first_component = nir_intrinsic_component(instr); 24867d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 24877d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke fs_inst *inst; 24887d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke if (indirect_offset.file == BAD_FILE) { 24897d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke /* Replicate the patch handle to all enabled channels */ 24907d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke fs_reg patch_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); 24917d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke bld.MOV(patch_handle, 24927d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)); 24937d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 2494e6ae19944d977dc91bc45adff679337182c20683Kenneth Graunke { 2495ad5dd39984467b29d20e03ec8bd26f6f1d2e97adTimothy Arceri if (first_component != 0) { 2496ad5dd39984467b29d20e03ec8bd26f6f1d2e97adTimothy Arceri unsigned read_components = 2497ad5dd39984467b29d20e03ec8bd26f6f1d2e97adTimothy Arceri instr->num_components + first_component; 2498ad5dd39984467b29d20e03ec8bd26f6f1d2e97adTimothy Arceri fs_reg tmp = bld.vgrf(dst.type, read_components); 2499ad5dd39984467b29d20e03ec8bd26f6f1d2e97adTimothy Arceri inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp, 2500ad5dd39984467b29d20e03ec8bd26f6f1d2e97adTimothy Arceri patch_handle); 250169570bbad876bb9da609c3b651aacda28cecc542Francisco Jerez inst->size_written = read_components * REG_SIZE; 2502ad5dd39984467b29d20e03ec8bd26f6f1d2e97adTimothy Arceri for (unsigned i = 0; i < instr->num_components; i++) { 2503ad5dd39984467b29d20e03ec8bd26f6f1d2e97adTimothy Arceri bld.MOV(offset(dst, bld, i), 2504ad5dd39984467b29d20e03ec8bd26f6f1d2e97adTimothy Arceri offset(tmp, bld, i + first_component)); 2505ad5dd39984467b29d20e03ec8bd26f6f1d2e97adTimothy Arceri } 2506ad5dd39984467b29d20e03ec8bd26f6f1d2e97adTimothy Arceri } else { 2507ad5dd39984467b29d20e03ec8bd26f6f1d2e97adTimothy Arceri inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, 2508ad5dd39984467b29d20e03ec8bd26f6f1d2e97adTimothy Arceri patch_handle); 250969570bbad876bb9da609c3b651aacda28cecc542Francisco Jerez inst->size_written = instr->num_components * REG_SIZE; 2510ad5dd39984467b29d20e03ec8bd26f6f1d2e97adTimothy Arceri } 25117d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke inst->offset = imm_offset; 25127d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke inst->mlen = 1; 25137d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke } 25147d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke } else { 25157d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke /* Indirect indexing - use per-slot offsets as well. */ 25167d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke const fs_reg srcs[] = { 25177d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD), 25187d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke indirect_offset 25197d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke }; 25207d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); 25217d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0); 2522ad5dd39984467b29d20e03ec8bd26f6f1d2e97adTimothy Arceri if (first_component != 0) { 2523ad5dd39984467b29d20e03ec8bd26f6f1d2e97adTimothy Arceri unsigned read_components = 2524ad5dd39984467b29d20e03ec8bd26f6f1d2e97adTimothy Arceri instr->num_components + first_component; 2525ad5dd39984467b29d20e03ec8bd26f6f1d2e97adTimothy Arceri fs_reg tmp = bld.vgrf(dst.type, read_components); 2526ad5dd39984467b29d20e03ec8bd26f6f1d2e97adTimothy Arceri inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, tmp, 2527ad5dd39984467b29d20e03ec8bd26f6f1d2e97adTimothy Arceri payload); 252869570bbad876bb9da609c3b651aacda28cecc542Francisco Jerez inst->size_written = read_components * REG_SIZE; 2529ad5dd39984467b29d20e03ec8bd26f6f1d2e97adTimothy Arceri for (unsigned i = 0; i < instr->num_components; i++) { 2530ad5dd39984467b29d20e03ec8bd26f6f1d2e97adTimothy Arceri bld.MOV(offset(dst, bld, i), 2531ad5dd39984467b29d20e03ec8bd26f6f1d2e97adTimothy Arceri offset(tmp, bld, i + first_component)); 2532ad5dd39984467b29d20e03ec8bd26f6f1d2e97adTimothy Arceri } 2533ad5dd39984467b29d20e03ec8bd26f6f1d2e97adTimothy Arceri } else { 2534ad5dd39984467b29d20e03ec8bd26f6f1d2e97adTimothy Arceri inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst, 2535ad5dd39984467b29d20e03ec8bd26f6f1d2e97adTimothy Arceri payload); 253669570bbad876bb9da609c3b651aacda28cecc542Francisco Jerez inst->size_written = instr->num_components * REG_SIZE; 2537ad5dd39984467b29d20e03ec8bd26f6f1d2e97adTimothy Arceri } 25387d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke inst->offset = imm_offset; 25397d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke inst->mlen = 2; 25407d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke } 25417d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke break; 25427d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke } 25437d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 25447d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke case nir_intrinsic_store_output: 25457d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke case nir_intrinsic_store_per_vertex_output: { 25467d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke fs_reg value = get_nir_src(instr->src[0]); 254761197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga bool is_64bit = (instr->src[0].is_ssa ? 254861197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga instr->src[0].ssa->bit_size : instr->src[0].reg.reg->bit_size) == 64; 25497d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke fs_reg indirect_offset = get_indirect_offset(instr); 25507d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke unsigned imm_offset = instr->const_index[0]; 25517d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke unsigned swiz = BRW_SWIZZLE_XYZW; 25527d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke unsigned mask = instr->const_index[1]; 25537d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke unsigned header_regs = 0; 25547d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke fs_reg srcs[7]; 25557d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke srcs[header_regs++] = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD); 25567d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 25577d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke if (indirect_offset.file != BAD_FILE) { 25587d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke srcs[header_regs++] = indirect_offset; 25597d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke } 25607d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 25617d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke if (mask == 0) 25627d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke break; 25637d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 2564027cbf00f248bda325521db8f56a3718898da46bMathias Fröhlich unsigned num_components = util_last_bit(mask); 25657d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke enum opcode opcode; 25667d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 256761197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga /* We can only pack two 64-bit components in a single message, so send 256861197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga * 2 messages if we have more components 256961197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga */ 257061197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga unsigned num_iterations = 1; 257161197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga unsigned iter_components = num_components; 257227e28197e8e82e8c47fda5d6e912c5cb62c03f4aTimothy Arceri unsigned first_component = nir_intrinsic_component(instr); 257327e28197e8e82e8c47fda5d6e912c5cb62c03f4aTimothy Arceri if (is_64bit) { 257427e28197e8e82e8c47fda5d6e912c5cb62c03f4aTimothy Arceri first_component = first_component / 2; 257527e28197e8e82e8c47fda5d6e912c5cb62c03f4aTimothy Arceri if (instr->num_components > 2) { 257627e28197e8e82e8c47fda5d6e912c5cb62c03f4aTimothy Arceri num_iterations = 2; 257727e28197e8e82e8c47fda5d6e912c5cb62c03f4aTimothy Arceri iter_components = 2; 257827e28197e8e82e8c47fda5d6e912c5cb62c03f4aTimothy Arceri } 25797d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke } 25807d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 258161197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga /* 64-bit data needs to me shuffled before we can write it to the URB. 258261197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga * We will use this temporary to shuffle the components in each 258361197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga * iteration. 258461197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga */ 258561197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga fs_reg tmp = 258661197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga fs_reg(VGRF, alloc.allocate(2 * iter_components), value.type); 25877d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 25882bda4b062f62edac1011bf65f410eeca176b5e23Timothy Arceri mask = mask << first_component; 25892bda4b062f62edac1011bf65f410eeca176b5e23Timothy Arceri 259061197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga for (unsigned iter = 0; iter < num_iterations; iter++) { 259161197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga if (!is_64bit && mask != WRITEMASK_XYZW) { 259261197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga srcs[header_regs++] = brw_imm_ud(mask << 16); 259361197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga opcode = indirect_offset.file != BAD_FILE ? 259461197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT : 259561197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga SHADER_OPCODE_URB_WRITE_SIMD8_MASKED; 259661197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga } else if (is_64bit && ((mask & WRITEMASK_XY) != WRITEMASK_XY)) { 259761197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga /* Expand the 64-bit mask to 32-bit channels. We only handle 259861197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga * two channels in each iteration, so we only care about X/Y. 259961197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga */ 260061197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga unsigned mask32 = 0; 260161197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga if (mask & WRITEMASK_X) 260261197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga mask32 |= WRITEMASK_XY; 260361197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga if (mask & WRITEMASK_Y) 260461197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga mask32 |= WRITEMASK_ZW; 260561197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga 260661197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga /* If the mask does not include any of the channels X or Y there 260761197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga * is nothing to do in this iteration. Move on to the next couple 260861197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga * of 64-bit channels. 260961197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga */ 261061197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga if (!mask32) { 261161197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga mask >>= 2; 261261197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga imm_offset++; 261361197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga continue; 261461197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga } 26157d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 261661197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga srcs[header_regs++] = brw_imm_ud(mask32 << 16); 261761197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga opcode = indirect_offset.file != BAD_FILE ? 261861197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT : 261961197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga SHADER_OPCODE_URB_WRITE_SIMD8_MASKED; 262061197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga } else { 262161197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga opcode = indirect_offset.file != BAD_FILE ? 262261197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT : 262361197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga SHADER_OPCODE_URB_WRITE_SIMD8; 262461197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga } 26257d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 262661197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga for (unsigned i = 0; i < iter_components; i++) { 26272bda4b062f62edac1011bf65f410eeca176b5e23Timothy Arceri if (!(mask & (1 << (i + first_component)))) 262861197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga continue; 262961197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga 263061197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga if (!is_64bit) { 26312bda4b062f62edac1011bf65f410eeca176b5e23Timothy Arceri srcs[header_regs + i + first_component] = 26322bda4b062f62edac1011bf65f410eeca176b5e23Timothy Arceri offset(value, bld, BRW_GET_SWZ(swiz, i)); 263361197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga } else { 263461197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga /* We need to shuffle the 64-bit data to match the layout 263561197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga * expected by our 32-bit URB write messages. We use a temporary 263661197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga * for that. 263761197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga */ 263861197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga unsigned channel = BRW_GET_SWZ(swiz, iter * 2 + i); 263961197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga shuffle_64bit_data_for_32bit_write(bld, 264061197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga retype(offset(tmp, bld, 2 * i), BRW_REGISTER_TYPE_F), 264161197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga retype(offset(value, bld, 2 * channel), BRW_REGISTER_TYPE_DF), 264261197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga 1); 264361197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga 264461197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga /* Now copy the data to the destination */ 264561197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga fs_reg dest = fs_reg(VGRF, alloc.allocate(2), value.type); 264661197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga unsigned idx = 2 * i; 264761197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga bld.MOV(dest, offset(tmp, bld, idx)); 264861197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga bld.MOV(offset(dest, bld, 1), offset(tmp, bld, idx + 1)); 264927e28197e8e82e8c47fda5d6e912c5cb62c03f4aTimothy Arceri srcs[header_regs + idx + first_component * 2] = dest; 265027e28197e8e82e8c47fda5d6e912c5cb62c03f4aTimothy Arceri srcs[header_regs + idx + 1 + first_component * 2] = 265127e28197e8e82e8c47fda5d6e912c5cb62c03f4aTimothy Arceri offset(dest, bld, 1); 265261197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga } 265361197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga } 265461197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga 265561197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga unsigned mlen = 26562bda4b062f62edac1011bf65f410eeca176b5e23Timothy Arceri header_regs + (is_64bit ? 2 * iter_components : iter_components) + 265727e28197e8e82e8c47fda5d6e912c5cb62c03f4aTimothy Arceri (is_64bit ? 2 * first_component : first_component); 265861197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga fs_reg payload = 265961197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga bld.vgrf(BRW_REGISTER_TYPE_UD, mlen); 266061197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga bld.LOAD_PAYLOAD(payload, srcs, mlen, header_regs); 266161197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga 266261197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga fs_inst *inst = bld.emit(opcode, bld.null_reg_ud(), payload); 266361197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga inst->offset = imm_offset; 266461197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga inst->mlen = mlen; 266561197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga 266661197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga /* If this is a 64-bit attribute, select the next two 64-bit channels 266761197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga * to be handled in the next iteration. 266861197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga */ 266961197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga if (is_64bit) { 267061197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga mask >>= 2; 267161197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga imm_offset++; 267261197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga } 267361197b8d5dd963bd9288385308feb3f0dcaf6742Iago Toral Quiroga } 26747d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke break; 26757d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke } 26767d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 26777d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke default: 26787d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke nir_emit_intrinsic(bld, instr); 26797d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke break; 26807d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke } 26817d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke} 26827d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunke 26837d9143ad885752184156b3a0d3e492aef09af3b0Kenneth Graunkevoid 2684a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunkefs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld, 2685a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke nir_intrinsic_instr *instr) 2686a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke{ 2687a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke assert(stage == MESA_SHADER_TESS_EVAL); 2688e51e055fcdf8107aafaba358fa65b00f963e1728Kenneth Graunke struct brw_tes_prog_data *tes_prog_data = brw_tes_prog_data(prog_data); 2689a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke 2690a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke fs_reg dest; 2691a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke if (nir_intrinsic_infos[instr->intrinsic].has_dest) 2692a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke dest = get_nir_dest(instr->dest); 2693a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke 2694a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke switch (instr->intrinsic) { 2695a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke case nir_intrinsic_load_primitive_id: 2696a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke bld.MOV(dest, fs_reg(brw_vec1_grf(0, 1))); 2697a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke break; 2698a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke case nir_intrinsic_load_tess_coord: 2699a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke /* gl_TessCoord is part of the payload in g1-3 */ 2700a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke for (unsigned i = 0; i < 3; i++) { 2701a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke bld.MOV(offset(dest, bld, i), fs_reg(brw_vec8_grf(1 + i, 0))); 2702a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke } 2703a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke break; 2704a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke 2705a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke case nir_intrinsic_load_input: 2706a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke case nir_intrinsic_load_per_vertex_input: { 2707a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke fs_reg indirect_offset = get_indirect_offset(instr); 2708a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke unsigned imm_offset = instr->const_index[0]; 2709cfff71a47a655e8cf930e858d408dc4db942ec7cTimothy Arceri unsigned first_component = nir_intrinsic_component(instr); 2710a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke 271127e28197e8e82e8c47fda5d6e912c5cb62c03f4aTimothy Arceri if (type_sz(dest.type) == 8) { 271227e28197e8e82e8c47fda5d6e912c5cb62c03f4aTimothy Arceri first_component = first_component / 2; 271327e28197e8e82e8c47fda5d6e912c5cb62c03f4aTimothy Arceri } 271427e28197e8e82e8c47fda5d6e912c5cb62c03f4aTimothy Arceri 2715a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke fs_inst *inst; 2716a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke if (indirect_offset.file == BAD_FILE) { 27174a1c8a3037cd29938b2a6e2c680c341e9903cfbeKenneth Graunke /* Arbitrarily only push up to 32 vec4 slots worth of data, 27184a1c8a3037cd29938b2a6e2c680c341e9903cfbeKenneth Graunke * which is 16 registers (since each holds 2 vec4 slots). 27194a1c8a3037cd29938b2a6e2c680c341e9903cfbeKenneth Graunke */ 27204a1c8a3037cd29938b2a6e2c680c341e9903cfbeKenneth Graunke const unsigned max_push_slots = 32; 27214a1c8a3037cd29938b2a6e2c680c341e9903cfbeKenneth Graunke if (imm_offset < max_push_slots) { 27224a1c8a3037cd29938b2a6e2c680c341e9903cfbeKenneth Graunke fs_reg src = fs_reg(ATTR, imm_offset / 2, dest.type); 27234a1c8a3037cd29938b2a6e2c680c341e9903cfbeKenneth Graunke for (int i = 0; i < instr->num_components; i++) { 2724cfff71a47a655e8cf930e858d408dc4db942ec7cTimothy Arceri unsigned comp = 16 / type_sz(dest.type) * (imm_offset % 2) + 2725cfff71a47a655e8cf930e858d408dc4db942ec7cTimothy Arceri i + first_component; 272658f304defe804a6f01b0b961997ecfe61fe00d34Iago Toral Quiroga bld.MOV(offset(dest, bld, i), component(src, comp)); 27274a1c8a3037cd29938b2a6e2c680c341e9903cfbeKenneth Graunke } 27284a1c8a3037cd29938b2a6e2c680c341e9903cfbeKenneth Graunke tes_prog_data->base.urb_read_length = 27294a1c8a3037cd29938b2a6e2c680c341e9903cfbeKenneth Graunke MAX2(tes_prog_data->base.urb_read_length, 27304a1c8a3037cd29938b2a6e2c680c341e9903cfbeKenneth Graunke DIV_ROUND_UP(imm_offset + 1, 2)); 27314a1c8a3037cd29938b2a6e2c680c341e9903cfbeKenneth Graunke } else { 27324a1c8a3037cd29938b2a6e2c680c341e9903cfbeKenneth Graunke /* Replicate the patch handle to all enabled channels */ 27334a1c8a3037cd29938b2a6e2c680c341e9903cfbeKenneth Graunke const fs_reg srcs[] = { 27344a1c8a3037cd29938b2a6e2c680c341e9903cfbeKenneth Graunke retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD) 27354a1c8a3037cd29938b2a6e2c680c341e9903cfbeKenneth Graunke }; 27364a1c8a3037cd29938b2a6e2c680c341e9903cfbeKenneth Graunke fs_reg patch_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); 27374a1c8a3037cd29938b2a6e2c680c341e9903cfbeKenneth Graunke bld.LOAD_PAYLOAD(patch_handle, srcs, ARRAY_SIZE(srcs), 0); 27384a1c8a3037cd29938b2a6e2c680c341e9903cfbeKenneth Graunke 2739cfff71a47a655e8cf930e858d408dc4db942ec7cTimothy Arceri if (first_component != 0) { 2740cfff71a47a655e8cf930e858d408dc4db942ec7cTimothy Arceri unsigned read_components = 2741cfff71a47a655e8cf930e858d408dc4db942ec7cTimothy Arceri instr->num_components + first_component; 2742cfff71a47a655e8cf930e858d408dc4db942ec7cTimothy Arceri fs_reg tmp = bld.vgrf(dest.type, read_components); 2743cfff71a47a655e8cf930e858d408dc4db942ec7cTimothy Arceri inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp, 2744cfff71a47a655e8cf930e858d408dc4db942ec7cTimothy Arceri patch_handle); 274569570bbad876bb9da609c3b651aacda28cecc542Francisco Jerez inst->size_written = read_components * REG_SIZE; 2746cfff71a47a655e8cf930e858d408dc4db942ec7cTimothy Arceri for (unsigned i = 0; i < instr->num_components; i++) { 2747cfff71a47a655e8cf930e858d408dc4db942ec7cTimothy Arceri bld.MOV(offset(dest, bld, i), 2748cfff71a47a655e8cf930e858d408dc4db942ec7cTimothy Arceri offset(tmp, bld, i + first_component)); 2749cfff71a47a655e8cf930e858d408dc4db942ec7cTimothy Arceri } 2750cfff71a47a655e8cf930e858d408dc4db942ec7cTimothy Arceri } else { 2751cfff71a47a655e8cf930e858d408dc4db942ec7cTimothy Arceri inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dest, 2752cfff71a47a655e8cf930e858d408dc4db942ec7cTimothy Arceri patch_handle); 275369570bbad876bb9da609c3b651aacda28cecc542Francisco Jerez inst->size_written = instr->num_components * REG_SIZE; 2754cfff71a47a655e8cf930e858d408dc4db942ec7cTimothy Arceri } 27554a1c8a3037cd29938b2a6e2c680c341e9903cfbeKenneth Graunke inst->mlen = 1; 27564a1c8a3037cd29938b2a6e2c680c341e9903cfbeKenneth Graunke inst->offset = imm_offset; 27574a1c8a3037cd29938b2a6e2c680c341e9903cfbeKenneth Graunke } 2758a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke } else { 2759a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke /* Indirect indexing - use per-slot offsets as well. */ 2760a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke 27610f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga /* We can only read two double components with each URB read, so 27620f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga * we send two read messages in that case, each one loading up to 27630f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga * two double components. 27640f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga */ 27650f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga unsigned num_iterations = 1; 27660f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga unsigned num_components = instr->num_components; 27670f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga fs_reg orig_dest = dest; 27680f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga if (type_sz(dest.type) == 8) { 27690f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga if (instr->num_components > 2) { 27700f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga num_iterations = 2; 27710f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga num_components = 2; 27720f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga } 27730f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga fs_reg tmp = fs_reg(VGRF, alloc.allocate(4), dest.type); 27740f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga dest = tmp; 27750f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga } 27760f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga 27770f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga for (unsigned iter = 0; iter < num_iterations; iter++) { 27780f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga const fs_reg srcs[] = { 27790f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD), 27800f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga indirect_offset 27810f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga }; 27820f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); 27830f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0); 27840f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga 27850f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga if (first_component != 0) { 27860f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga unsigned read_components = 27870f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga num_components + first_component; 27880f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga fs_reg tmp = bld.vgrf(dest.type, read_components); 27890f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, tmp, 27900f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga payload); 27910f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga for (unsigned i = 0; i < num_components; i++) { 27920f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga bld.MOV(offset(dest, bld, i), 27930f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga offset(tmp, bld, i + first_component)); 27940f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga } 27950f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga } else { 27960f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dest, 27970f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga payload); 27980f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga } 27990f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga inst->mlen = 2; 28000f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga inst->offset = imm_offset; 28012d7d4a791083ff63f37ac1e40bfe8b448e7f8045Francisco Jerez inst->size_written = (num_components + first_component) * 28022d7d4a791083ff63f37ac1e40bfe8b448e7f8045Francisco Jerez inst->dst.component_size(inst->exec_size); 28030f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga 28040f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga /* If we are reading 64-bit data using 32-bit read messages we need 28050f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga * build proper 64-bit data elements by shuffling the low and high 28060f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga * 32-bit components around like we do for other things like UBOs 28070f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga * or SSBOs. 28080f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga */ 28090f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga if (type_sz(dest.type) == 8) { 28100f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga shuffle_32bit_load_result_to_64bit_data( 28110f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga bld, dest, retype(dest, BRW_REGISTER_TYPE_F), num_components); 28120f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga 28130f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga for (unsigned c = 0; c < num_components; c++) { 28140f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga bld.MOV(offset(orig_dest, bld, iter * 2 + c), 28150f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga offset(dest, bld, c)); 28160f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga } 28170f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga } 28180f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga 28190f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga /* If we are loading double data and we need a second read message 28200f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga * adjust the offset 28210f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga */ 28220f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga if (num_iterations > 1) { 28230f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga num_components = instr->num_components - 2; 28240f2516d88f6607b2816445c2dc18607cdaf1beffIago Toral Quiroga imm_offset++; 28252477e6cfada55563631c654fce9250e4fe276f0eTimothy Arceri } 28262477e6cfada55563631c654fce9250e4fe276f0eTimothy Arceri } 2827a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke } 2828a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke break; 2829a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke } 2830a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke default: 2831a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke nir_emit_intrinsic(bld, instr); 2832a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke break; 2833a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke } 2834a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke} 2835a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunke 2836a5038427c3624e559f954124d77304f9ae9b884cKenneth Graunkevoid 2837918bda23dda36004c95f6441328ecc892e068886Kenneth Graunkefs_visitor::nir_emit_gs_intrinsic(const fs_builder &bld, 2838918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke nir_intrinsic_instr *instr) 2839918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke{ 2840918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke assert(stage == MESA_SHADER_GEOMETRY); 2841fc19a0d2e422ea8e45bc5440a91f858f5f345884Kenneth Graunke fs_reg indirect_offset; 2842918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 2843918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke fs_reg dest; 2844918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke if (nir_intrinsic_infos[instr->intrinsic].has_dest) 2845918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke dest = get_nir_dest(instr->dest); 2846918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 2847918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke switch (instr->intrinsic) { 2848918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_load_primitive_id: 2849918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke assert(stage == MESA_SHADER_GEOMETRY); 2850e51e055fcdf8107aafaba358fa65b00f963e1728Kenneth Graunke assert(brw_gs_prog_data(prog_data)->include_primitive_id); 2851918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD), 2852918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke retype(fs_reg(brw_vec8_grf(2, 0)), BRW_REGISTER_TYPE_UD)); 2853918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke break; 2854918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 2855918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_load_input: 2856918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke unreachable("load_input intrinsics are invalid for the GS stage"); 2857918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 2858918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_load_per_vertex_input: 285978b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand emit_gs_input_load(dest, instr->src[0], instr->const_index[0], 2860a102ef2d4fd01a946f949a45115d65abb6714a5bTimothy Arceri instr->src[1], instr->num_components, 2861a102ef2d4fd01a946f949a45115d65abb6714a5bTimothy Arceri nir_intrinsic_component(instr)); 2862918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke break; 2863918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 2864918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_emit_vertex_with_counter: 2865918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke emit_gs_vertex(instr->src[0], instr->const_index[0]); 2866918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke break; 2867918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 2868918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_end_primitive_with_counter: 2869918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke emit_gs_end_primitive(instr->src[0]); 2870918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke break; 2871918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 2872918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_set_vertex_count: 2873918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke bld.MOV(this->final_gs_vertex_count, get_nir_src(instr->src[0])); 2874918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke break; 2875918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 2876918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_load_invocation_id: { 2877918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke fs_reg val = nir_system_values[SYSTEM_VALUE_INVOCATION_ID]; 2878918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke assert(val.file != BAD_FILE); 2879918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke dest.type = val.type; 2880918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke bld.MOV(dest, val); 2881918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke break; 2882918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke } 2883918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 2884918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke default: 2885918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke nir_emit_intrinsic(bld, instr); 2886918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke break; 2887918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke } 2888918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke} 2889918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 2890af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez/** 2891af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez * Fetch the current render target layer index. 2892af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez */ 2893af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerezstatic fs_reg 2894af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerezfetch_render_target_array_index(const fs_builder &bld) 2895af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez{ 2896af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez if (bld.shader->devinfo->gen >= 6) { 2897af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez /* The render target array index is provided in the thread payload as 2898af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez * bits 26:16 of r0.0. 2899af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez */ 2900af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez const fs_reg idx = bld.vgrf(BRW_REGISTER_TYPE_UD); 2901af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez bld.AND(idx, brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 0, 1), 2902af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez brw_imm_uw(0x7ff)); 2903af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez return idx; 2904af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez } else { 2905af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez /* Pre-SNB we only ever render into the first layer of the framebuffer 2906af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez * since layered rendering is not implemented. 2907af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez */ 2908af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez return brw_imm_ud(0); 2909af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez } 2910af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez} 2911af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez 2912af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez/** 2913af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez * Fake non-coherent framebuffer read implemented using TXF to fetch from the 2914af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez * framebuffer at the current fragment coordinates and sample index. 2915af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez */ 2916af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerezfs_inst * 2917af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerezfs_visitor::emit_non_coherent_fb_read(const fs_builder &bld, const fs_reg &dst, 2918af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez unsigned target) 2919af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez{ 2920979d0aca6277975986f5f278cad0f37616c9d91fJason Ekstrand const struct gen_device_info *devinfo = bld.shader->devinfo; 2921af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez 2922af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez assert(bld.shader->stage == MESA_SHADER_FRAGMENT); 2923af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez const brw_wm_prog_key *wm_key = 2924af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez reinterpret_cast<const brw_wm_prog_key *>(key); 2925af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez assert(!wm_key->coherent_fb_fetch); 2926e51e055fcdf8107aafaba358fa65b00f963e1728Kenneth Graunke const struct brw_wm_prog_data *wm_prog_data = 2927e51e055fcdf8107aafaba358fa65b00f963e1728Kenneth Graunke brw_wm_prog_data(stage_prog_data); 2928af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez 2929af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez /* Calculate the surface index relative to the start of the texture binding 2930af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez * table block, since that's what the texturing messages expect. 2931af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez */ 2932af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez const unsigned surface = target + 2933af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez wm_prog_data->binding_table.render_target_read_start - 2934af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez wm_prog_data->base.binding_table.texture_start; 2935af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez 2936af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez brw_mark_surface_used( 2937af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez bld.shader->stage_prog_data, 2938af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez wm_prog_data->binding_table.render_target_read_start + target); 2939af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez 2940af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez /* Calculate the fragment coordinates. */ 2941af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez const fs_reg coords = bld.vgrf(BRW_REGISTER_TYPE_UD, 3); 2942af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez bld.MOV(offset(coords, bld, 0), pixel_x); 2943af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez bld.MOV(offset(coords, bld, 1), pixel_y); 2944af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez bld.MOV(offset(coords, bld, 2), fetch_render_target_array_index(bld)); 2945af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez 2946af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez /* Calculate the sample index and MCS payload when multisampling. Luckily 2947af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez * the MCS fetch message behaves deterministically for UMS surfaces, so it 2948af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez * shouldn't be necessary to recompile based on whether the framebuffer is 2949af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez * CMS or UMS. 2950af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez */ 2951af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez if (wm_key->multisample_fbo && 2952af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez nir_system_values[SYSTEM_VALUE_SAMPLE_ID].file == BAD_FILE) 2953af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez nir_system_values[SYSTEM_VALUE_SAMPLE_ID] = *emit_sampleid_setup(); 2954af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez 2955af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez const fs_reg sample = nir_system_values[SYSTEM_VALUE_SAMPLE_ID]; 2956af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez const fs_reg mcs = wm_key->multisample_fbo ? 2957af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez emit_mcs_fetch(coords, 3, brw_imm_ud(surface)) : fs_reg(); 2958af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez 2959af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez /* Use either a normal or a CMS texel fetch message depending on whether 2960af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez * the framebuffer is single or multisample. On SKL+ use the wide CMS 2961af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez * message just in case the framebuffer uses 16x multisampling, it should 2962af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez * be equivalent to the normal CMS fetch for lower multisampling modes. 2963af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez */ 2964af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez const opcode op = !wm_key->multisample_fbo ? SHADER_OPCODE_TXF_LOGICAL : 2965af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez devinfo->gen >= 9 ? SHADER_OPCODE_TXF_CMS_W_LOGICAL : 2966af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez SHADER_OPCODE_TXF_CMS_LOGICAL; 2967af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez 2968af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez /* Emit the instruction. */ 2969af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez const fs_reg srcs[] = { coords, fs_reg(), brw_imm_ud(0), fs_reg(), 2970af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez sample, mcs, 2971af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez brw_imm_ud(surface), brw_imm_ud(0), 2972af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez fs_reg(), brw_imm_ud(3), brw_imm_ud(0) }; 2973af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez STATIC_ASSERT(ARRAY_SIZE(srcs) == TEX_LOGICAL_NUM_SRCS); 2974af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez 2975af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez fs_inst *inst = bld.emit(op, dst, srcs, ARRAY_SIZE(srcs)); 297669570bbad876bb9da609c3b651aacda28cecc542Francisco Jerez inst->size_written = 4 * inst->dst.component_size(inst->exec_size); 2977af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez 2978af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez return inst; 2979af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez} 2980af0cc743e607293146861518bb6ef96f411aeca9Francisco Jerez 29814135fc22ff735a40c36fcf051c1735fe23d154f2Francisco Jerez/** 29824135fc22ff735a40c36fcf051c1735fe23d154f2Francisco Jerez * Actual coherent framebuffer read implemented using the native render target 29834135fc22ff735a40c36fcf051c1735fe23d154f2Francisco Jerez * read message. Requires SKL+. 29844135fc22ff735a40c36fcf051c1735fe23d154f2Francisco Jerez */ 29854135fc22ff735a40c36fcf051c1735fe23d154f2Francisco Jerezstatic fs_inst * 29864135fc22ff735a40c36fcf051c1735fe23d154f2Francisco Jerezemit_coherent_fb_read(const fs_builder &bld, const fs_reg &dst, unsigned target) 29874135fc22ff735a40c36fcf051c1735fe23d154f2Francisco Jerez{ 29884135fc22ff735a40c36fcf051c1735fe23d154f2Francisco Jerez assert(bld.shader->devinfo->gen >= 9); 29894135fc22ff735a40c36fcf051c1735fe23d154f2Francisco Jerez fs_inst *inst = bld.emit(FS_OPCODE_FB_READ_LOGICAL, dst); 29904135fc22ff735a40c36fcf051c1735fe23d154f2Francisco Jerez inst->target = target; 299169570bbad876bb9da609c3b651aacda28cecc542Francisco Jerez inst->size_written = 4 * inst->dst.component_size(inst->exec_size); 29924135fc22ff735a40c36fcf051c1735fe23d154f2Francisco Jerez 29934135fc22ff735a40c36fcf051c1735fe23d154f2Francisco Jerez return inst; 29944135fc22ff735a40c36fcf051c1735fe23d154f2Francisco Jerez} 29954135fc22ff735a40c36fcf051c1735fe23d154f2Francisco Jerez 29967dac8820730777756c00d7024330517848dc3b9fFrancisco Jerezstatic fs_reg 2997b00a236d6a6212323f77248ba923c65eeb02592bFrancisco Jerezalloc_temporary(const fs_builder &bld, unsigned size, fs_reg *regs, unsigned n) 2998b00a236d6a6212323f77248ba923c65eeb02592bFrancisco Jerez{ 2999b00a236d6a6212323f77248ba923c65eeb02592bFrancisco Jerez if (n && regs[0].file != BAD_FILE) { 3000b00a236d6a6212323f77248ba923c65eeb02592bFrancisco Jerez return regs[0]; 3001b00a236d6a6212323f77248ba923c65eeb02592bFrancisco Jerez 3002b00a236d6a6212323f77248ba923c65eeb02592bFrancisco Jerez } else { 3003b00a236d6a6212323f77248ba923c65eeb02592bFrancisco Jerez const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, size); 3004b00a236d6a6212323f77248ba923c65eeb02592bFrancisco Jerez 3005b00a236d6a6212323f77248ba923c65eeb02592bFrancisco Jerez for (unsigned i = 0; i < n; i++) 3006b00a236d6a6212323f77248ba923c65eeb02592bFrancisco Jerez regs[i] = tmp; 3007b00a236d6a6212323f77248ba923c65eeb02592bFrancisco Jerez 3008b00a236d6a6212323f77248ba923c65eeb02592bFrancisco Jerez return tmp; 3009b00a236d6a6212323f77248ba923c65eeb02592bFrancisco Jerez } 3010b00a236d6a6212323f77248ba923c65eeb02592bFrancisco Jerez} 3011b00a236d6a6212323f77248ba923c65eeb02592bFrancisco Jerez 3012b00a236d6a6212323f77248ba923c65eeb02592bFrancisco Jerezstatic fs_reg 3013b00a236d6a6212323f77248ba923c65eeb02592bFrancisco Jerezalloc_frag_output(fs_visitor *v, unsigned location) 30147dac8820730777756c00d7024330517848dc3b9fFrancisco Jerez{ 30157dac8820730777756c00d7024330517848dc3b9fFrancisco Jerez assert(v->stage == MESA_SHADER_FRAGMENT); 30167dac8820730777756c00d7024330517848dc3b9fFrancisco Jerez const brw_wm_prog_key *const key = 30177dac8820730777756c00d7024330517848dc3b9fFrancisco Jerez reinterpret_cast<const brw_wm_prog_key *>(v->key); 30187dac8820730777756c00d7024330517848dc3b9fFrancisco Jerez const unsigned l = GET_FIELD(location, BRW_NIR_FRAG_OUTPUT_LOCATION); 30197dac8820730777756c00d7024330517848dc3b9fFrancisco Jerez const unsigned i = GET_FIELD(location, BRW_NIR_FRAG_OUTPUT_INDEX); 30207dac8820730777756c00d7024330517848dc3b9fFrancisco Jerez 30217dac8820730777756c00d7024330517848dc3b9fFrancisco Jerez if (i > 0 || (key->force_dual_color_blend && l == FRAG_RESULT_DATA1)) 3022b00a236d6a6212323f77248ba923c65eeb02592bFrancisco Jerez return alloc_temporary(v->bld, 4, &v->dual_src_output, 1); 30237dac8820730777756c00d7024330517848dc3b9fFrancisco Jerez 30247dac8820730777756c00d7024330517848dc3b9fFrancisco Jerez else if (l == FRAG_RESULT_COLOR) 3025b00a236d6a6212323f77248ba923c65eeb02592bFrancisco Jerez return alloc_temporary(v->bld, 4, v->outputs, 3026b00a236d6a6212323f77248ba923c65eeb02592bFrancisco Jerez MAX2(key->nr_color_regions, 1)); 30277dac8820730777756c00d7024330517848dc3b9fFrancisco Jerez 30287dac8820730777756c00d7024330517848dc3b9fFrancisco Jerez else if (l == FRAG_RESULT_DEPTH) 3029b00a236d6a6212323f77248ba923c65eeb02592bFrancisco Jerez return alloc_temporary(v->bld, 1, &v->frag_depth, 1); 30307dac8820730777756c00d7024330517848dc3b9fFrancisco Jerez 30317dac8820730777756c00d7024330517848dc3b9fFrancisco Jerez else if (l == FRAG_RESULT_STENCIL) 3032b00a236d6a6212323f77248ba923c65eeb02592bFrancisco Jerez return alloc_temporary(v->bld, 1, &v->frag_stencil, 1); 30337dac8820730777756c00d7024330517848dc3b9fFrancisco Jerez 30347dac8820730777756c00d7024330517848dc3b9fFrancisco Jerez else if (l == FRAG_RESULT_SAMPLE_MASK) 3035b00a236d6a6212323f77248ba923c65eeb02592bFrancisco Jerez return alloc_temporary(v->bld, 1, &v->sample_mask, 1); 30367dac8820730777756c00d7024330517848dc3b9fFrancisco Jerez 30377dac8820730777756c00d7024330517848dc3b9fFrancisco Jerez else if (l >= FRAG_RESULT_DATA0 && 30387dac8820730777756c00d7024330517848dc3b9fFrancisco Jerez l < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS) 3039b00a236d6a6212323f77248ba923c65eeb02592bFrancisco Jerez return alloc_temporary(v->bld, 4, 3040b00a236d6a6212323f77248ba923c65eeb02592bFrancisco Jerez &v->outputs[l - FRAG_RESULT_DATA0], 1); 30417dac8820730777756c00d7024330517848dc3b9fFrancisco Jerez 30427dac8820730777756c00d7024330517848dc3b9fFrancisco Jerez else 30437dac8820730777756c00d7024330517848dc3b9fFrancisco Jerez unreachable("Invalid location"); 30447dac8820730777756c00d7024330517848dc3b9fFrancisco Jerez} 30457dac8820730777756c00d7024330517848dc3b9fFrancisco Jerez 3046918bda23dda36004c95f6441328ecc892e068886Kenneth Graunkevoid 3047918bda23dda36004c95f6441328ecc892e068886Kenneth Graunkefs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, 3048918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke nir_intrinsic_instr *instr) 3049918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke{ 3050918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke assert(stage == MESA_SHADER_FRAGMENT); 3051918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 3052918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke fs_reg dest; 3053918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke if (nir_intrinsic_infos[instr->intrinsic].has_dest) 3054918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke dest = get_nir_dest(instr->dest); 3055e257a5112476c47928b2fa2a2f2ea3108d13264bJason Ekstrand 30562faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott switch (instr->intrinsic) { 3057918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_load_front_face: 3058918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), 3059918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke *emit_frontfacing_interpolation()); 3060918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke break; 3061918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 3062918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_load_sample_pos: { 3063918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke fs_reg sample_pos = nir_system_values[SYSTEM_VALUE_SAMPLE_POS]; 3064918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke assert(sample_pos.file != BAD_FILE); 3065918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke dest.type = sample_pos.type; 3066918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke bld.MOV(dest, sample_pos); 3067918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke bld.MOV(offset(dest, bld, 1), offset(sample_pos, bld, 1)); 3068918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke break; 3069918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke } 3070918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 30712e311e421122e0232987fdca3645c6bd39fe2470Jason Ekstrand case nir_intrinsic_load_layer_id: 30722e311e421122e0232987fdca3645c6bd39fe2470Jason Ekstrand dest.type = BRW_REGISTER_TYPE_UD; 30732e311e421122e0232987fdca3645c6bd39fe2470Jason Ekstrand bld.MOV(dest, fetch_render_target_array_index(bld)); 30742e311e421122e0232987fdca3645c6bd39fe2470Jason Ekstrand break; 30752e311e421122e0232987fdca3645c6bd39fe2470Jason Ekstrand 3076c875e3cdd21811ad6669160d59fa39a4526ef872Matt Turner case nir_intrinsic_load_helper_invocation: 3077918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_load_sample_mask_in: 3078918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_load_sample_id: { 3079918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic); 3080918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke fs_reg val = nir_system_values[sv]; 3081918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke assert(val.file != BAD_FILE); 3082918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke dest.type = val.type; 3083918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke bld.MOV(dest, val); 3084918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke break; 3085918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke } 3086918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 3087f3cb2c34f29d35088879a6b8101c3ac648e0febfFrancisco Jerez case nir_intrinsic_store_output: { 3088f3cb2c34f29d35088879a6b8101c3ac648e0febfFrancisco Jerez const fs_reg src = get_nir_src(instr->src[0]); 30897dac8820730777756c00d7024330517848dc3b9fFrancisco Jerez const nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); 3090f3cb2c34f29d35088879a6b8101c3ac648e0febfFrancisco Jerez assert(const_offset && "Indirect output stores not allowed"); 30917dac8820730777756c00d7024330517848dc3b9fFrancisco Jerez const unsigned location = nir_intrinsic_base(instr) + 30927dac8820730777756c00d7024330517848dc3b9fFrancisco Jerez SET_FIELD(const_offset->u32[0], BRW_NIR_FRAG_OUTPUT_LOCATION); 3093b00a236d6a6212323f77248ba923c65eeb02592bFrancisco Jerez const fs_reg new_dest = retype(alloc_frag_output(this, location), 30947dac8820730777756c00d7024330517848dc3b9fFrancisco Jerez src.type); 3095f3cb2c34f29d35088879a6b8101c3ac648e0febfFrancisco Jerez 3096f3cb2c34f29d35088879a6b8101c3ac648e0febfFrancisco Jerez for (unsigned j = 0; j < instr->num_components; j++) 3097f3cb2c34f29d35088879a6b8101c3ac648e0febfFrancisco Jerez bld.MOV(offset(new_dest, bld, nir_intrinsic_component(instr) + j), 3098f3cb2c34f29d35088879a6b8101c3ac648e0febfFrancisco Jerez offset(src, bld, j)); 3099f3cb2c34f29d35088879a6b8101c3ac648e0febfFrancisco Jerez 3100f3cb2c34f29d35088879a6b8101c3ac648e0febfFrancisco Jerez break; 3101f3cb2c34f29d35088879a6b8101c3ac648e0febfFrancisco Jerez } 3102f3cb2c34f29d35088879a6b8101c3ac648e0febfFrancisco Jerez 3103f24e393bd5caee85994b00b93f141e6c4b99e273Francisco Jerez case nir_intrinsic_load_output: { 3104f24e393bd5caee85994b00b93f141e6c4b99e273Francisco Jerez const unsigned l = GET_FIELD(nir_intrinsic_base(instr), 3105f24e393bd5caee85994b00b93f141e6c4b99e273Francisco Jerez BRW_NIR_FRAG_OUTPUT_LOCATION); 3106f24e393bd5caee85994b00b93f141e6c4b99e273Francisco Jerez assert(l >= FRAG_RESULT_DATA0); 3107f24e393bd5caee85994b00b93f141e6c4b99e273Francisco Jerez nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); 3108f24e393bd5caee85994b00b93f141e6c4b99e273Francisco Jerez assert(const_offset && "Indirect output loads not allowed"); 3109f24e393bd5caee85994b00b93f141e6c4b99e273Francisco Jerez const unsigned target = l - FRAG_RESULT_DATA0 + const_offset->u32[0]; 3110f24e393bd5caee85994b00b93f141e6c4b99e273Francisco Jerez const fs_reg tmp = bld.vgrf(dest.type, 4); 3111f24e393bd5caee85994b00b93f141e6c4b99e273Francisco Jerez 31124135fc22ff735a40c36fcf051c1735fe23d154f2Francisco Jerez if (reinterpret_cast<const brw_wm_prog_key *>(key)->coherent_fb_fetch) 31134135fc22ff735a40c36fcf051c1735fe23d154f2Francisco Jerez emit_coherent_fb_read(bld, tmp, target); 31144135fc22ff735a40c36fcf051c1735fe23d154f2Francisco Jerez else 31154135fc22ff735a40c36fcf051c1735fe23d154f2Francisco Jerez emit_non_coherent_fb_read(bld, tmp, target); 3116f24e393bd5caee85994b00b93f141e6c4b99e273Francisco Jerez 3117f24e393bd5caee85994b00b93f141e6c4b99e273Francisco Jerez for (unsigned j = 0; j < instr->num_components; j++) { 3118f24e393bd5caee85994b00b93f141e6c4b99e273Francisco Jerez bld.MOV(offset(dest, bld, j), 3119f24e393bd5caee85994b00b93f141e6c4b99e273Francisco Jerez offset(tmp, bld, nir_intrinsic_component(instr) + j)); 3120f24e393bd5caee85994b00b93f141e6c4b99e273Francisco Jerez } 3121f24e393bd5caee85994b00b93f141e6c4b99e273Francisco Jerez 3122f24e393bd5caee85994b00b93f141e6c4b99e273Francisco Jerez break; 3123f24e393bd5caee85994b00b93f141e6c4b99e273Francisco Jerez } 3124f24e393bd5caee85994b00b93f141e6c4b99e273Francisco Jerez 31258eb6c109994de2827b0a1340a2dc8d933edaf5e0Kenneth Graunke case nir_intrinsic_discard: 31268eb6c109994de2827b0a1340a2dc8d933edaf5e0Kenneth Graunke case nir_intrinsic_discard_if: { 31272faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott /* We track our discarded pixels in f0.1. By predicating on it, we can 31288eb6c109994de2827b0a1340a2dc8d933edaf5e0Kenneth Graunke * update just the flag bits that aren't yet discarded. If there's no 31298eb6c109994de2827b0a1340a2dc8d933edaf5e0Kenneth Graunke * condition, we emit a CMP of g0 != g0, so all currently executing 31308eb6c109994de2827b0a1340a2dc8d933edaf5e0Kenneth Graunke * channels will get turned off. 31312faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott */ 31328eb6c109994de2827b0a1340a2dc8d933edaf5e0Kenneth Graunke fs_inst *cmp; 31338eb6c109994de2827b0a1340a2dc8d933edaf5e0Kenneth Graunke if (instr->intrinsic == nir_intrinsic_discard_if) { 3134979fe2ffee3956186017fe6c115aed53fc87ad3dFrancisco Jerez cmp = bld.CMP(bld.null_reg_f(), get_nir_src(instr->src[0]), 31353ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner brw_imm_d(0), BRW_CONDITIONAL_Z); 31368eb6c109994de2827b0a1340a2dc8d933edaf5e0Kenneth Graunke } else { 31378eb6c109994de2827b0a1340a2dc8d933edaf5e0Kenneth Graunke fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0), 31388eb6c109994de2827b0a1340a2dc8d933edaf5e0Kenneth Graunke BRW_REGISTER_TYPE_UW)); 3139979fe2ffee3956186017fe6c115aed53fc87ad3dFrancisco Jerez cmp = bld.CMP(bld.null_reg_f(), some_reg, some_reg, BRW_CONDITIONAL_NZ); 31408eb6c109994de2827b0a1340a2dc8d933edaf5e0Kenneth Graunke } 31412faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott cmp->predicate = BRW_PREDICATE_NORMAL; 31422faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott cmp->flag_subreg = 1; 31432faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 314428e9601d0e681411b60a7de8be9f401b0df77d29Jason Ekstrand if (devinfo->gen >= 6) { 31458a0946f3b1522e5f91afe14c8c3b22ba6009ed04Kenneth Graunke emit_discard_jump(); 31462faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott } 31472faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 31482faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott } 31492faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 31501eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke case nir_intrinsic_load_input: { 31511eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke /* load_input is only used for flat inputs */ 31521eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke unsigned base = nir_intrinsic_base(instr); 31531eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke unsigned component = nir_intrinsic_component(instr); 31541eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke unsigned num_components = instr->num_components; 31551eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke enum brw_reg_type type = dest.type; 3156854c4d8b37416d3e5593099a8e5441f3cf861173Francisco Jerez 31571eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke /* Special case fields in the VUE header */ 31581eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke if (base == VARYING_SLOT_LAYER) 31591eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke component = 1; 31601eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke else if (base == VARYING_SLOT_VIEWPORT) 31611eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke component = 2; 3162854c4d8b37416d3e5593099a8e5441f3cf861173Francisco Jerez 31631eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke if (nir_dest_bit_size(instr->dest) == 64) { 31641eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke /* const_index is in 32-bit type size units that could not be aligned 31651eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke * with DF. We need to read the double vector as if it was a float 31661eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke * vector of twice the number of components to fetch the right data. 31671eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke */ 31681eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke type = BRW_REGISTER_TYPE_F; 31691eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke num_components *= 2; 31701eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke } 3171d8c8f4203f8bb18152af0d0c120f3582a93c07c2Kenneth Graunke 31721eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke for (unsigned int i = 0; i < num_components; i++) { 31731eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke struct brw_reg interp = interp_reg(base, component + i); 31741eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke interp = suboffset(interp, 3); 31751eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke bld.emit(FS_OPCODE_CINTERP, offset(retype(dest, type), bld, i), 31761eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke retype(fs_reg(interp), type)); 31771eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke } 3178854c4d8b37416d3e5593099a8e5441f3cf861173Francisco Jerez 31791eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke if (nir_dest_bit_size(instr->dest) == 64) { 31801eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke shuffle_32bit_load_result_to_64bit_data(bld, 31811eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke dest, 31821eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke retype(dest, type), 31831eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke instr->num_components); 31841eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke } 31851eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke break; 31861eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke } 31871eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke 31881eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke case nir_intrinsic_load_barycentric_pixel: 31891eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke case nir_intrinsic_load_barycentric_centroid: 31901eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke case nir_intrinsic_load_barycentric_sample: 31911eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke /* Do nothing - load_interpolated_input handling will handle it later. */ 31921eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke break; 31931eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke 31941eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke case nir_intrinsic_load_barycentric_at_sample: { 31951eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke const glsl_interp_mode interpolation = 31961eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke (enum glsl_interp_mode) nir_intrinsic_interp_mode(instr); 31973af2623da5167aa686bcb2cff01d27058a507026Francisco Jerez 31981eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]); 31991eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke 32001eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke if (const_sample) { 32011eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke unsigned msg_data = const_sample->i32[0] << 4; 32021eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke 32031eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke emit_pixel_interpolater_send(bld, 32041eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke FS_OPCODE_INTERPOLATE_AT_SAMPLE, 32051eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke dest, 32061eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke fs_reg(), /* src */ 32071eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke brw_imm_ud(msg_data), 32081eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke interpolation); 32091eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke } else { 32101eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke const fs_reg sample_src = retype(get_nir_src(instr->src[0]), 32111eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke BRW_REGISTER_TYPE_UD); 32121eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke 32131eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke if (nir_src_is_dynamically_uniform(instr->src[0])) { 32141eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke const fs_reg sample_id = bld.emit_uniformize(sample_src); 32151eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke const fs_reg msg_data = vgrf(glsl_type::uint_type); 32161eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke bld.exec_all().group(1, 0) 32171eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke .SHL(msg_data, sample_id, brw_imm_ud(4u)); 3218918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke emit_pixel_interpolater_send(bld, 3219918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke FS_OPCODE_INTERPOLATE_AT_SAMPLE, 32201eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke dest, 3221918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke fs_reg(), /* src */ 32221eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke msg_data, 3223918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke interpolation); 3224918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke } else { 32251eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke /* Make a loop that sends a message to the pixel interpolater 32261eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke * for the sample number in each live channel. If there are 32271eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke * multiple channels with the same sample number then these 32281eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke * will be handled simultaneously with a single interation of 32291eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke * the loop. 32301eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke */ 32311eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke bld.emit(BRW_OPCODE_DO); 32321eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke 32331eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke /* Get the next live sample number into sample_id_reg */ 32341eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke const fs_reg sample_id = bld.emit_uniformize(sample_src); 32351eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke 32361eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke /* Set the flag register so that we can perform the send 32371eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke * message on all channels that have the same sample number 32381eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke */ 32391eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke bld.CMP(bld.null_reg_ud(), 32401eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke sample_src, sample_id, 32411eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke BRW_CONDITIONAL_EQ); 32421eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke const fs_reg msg_data = vgrf(glsl_type::uint_type); 32431eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke bld.exec_all().group(1, 0) 32441eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke .SHL(msg_data, sample_id, brw_imm_ud(4u)); 32451eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke fs_inst *inst = 3246918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke emit_pixel_interpolater_send(bld, 3247918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke FS_OPCODE_INTERPOLATE_AT_SAMPLE, 32481eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke dest, 3249918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke fs_reg(), /* src */ 3250918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke msg_data, 3251918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke interpolation); 32521eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke set_predicate(BRW_PREDICATE_NORMAL, inst); 3253a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez 32541eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke /* Continue the loop if there are any live channels left */ 32551eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke set_predicate_inv(BRW_PREDICATE_NORMAL, 32561eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke true, /* inverse */ 32571eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke bld.emit(BRW_OPCODE_WHILE)); 3258918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke } 3259918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke } 32601eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke break; 32611eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke } 3262a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez 32631eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke case nir_intrinsic_load_barycentric_at_offset: { 32641eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke const glsl_interp_mode interpolation = 32651eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke (enum glsl_interp_mode) nir_intrinsic_interp_mode(instr); 3266a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez 32671eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); 3268a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez 32691eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke if (const_offset) { 32701eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke unsigned off_x = MIN2((int)(const_offset->f32[0] * 16), 7) & 0xf; 32711eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke unsigned off_y = MIN2((int)(const_offset->f32[1] * 16), 7) & 0xf; 3272918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 32731eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke emit_pixel_interpolater_send(bld, 32741eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, 32751eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke dest, 32761eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke fs_reg(), /* src */ 32771eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke brw_imm_ud(off_x | (off_y << 4)), 32781eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke interpolation); 32791eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke } else { 32801eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke fs_reg src = vgrf(glsl_type::ivec2_type); 32811eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke fs_reg offset_src = retype(get_nir_src(instr->src[0]), 32821eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke BRW_REGISTER_TYPE_F); 32831eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke for (int i = 0; i < 2; i++) { 32841eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke fs_reg temp = vgrf(glsl_type::float_type); 32851eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke bld.MUL(temp, offset(offset_src, bld, i), brw_imm_f(16.0f)); 32861eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke fs_reg itemp = vgrf(glsl_type::int_type); 32871eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke /* float to int */ 32881eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke bld.MOV(itemp, temp); 32891eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke 32901eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke /* Clamp the upper end of the range to +7/16. 32911eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke * ARB_gpu_shader5 requires that we support a maximum offset 32921eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke * of +0.5, which isn't representable in a S0.4 value -- if 32931eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke * we didn't clamp it, we'd end up with -8/16, which is the 32941eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke * opposite of what the shader author wanted. 32951eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke * 32961eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke * This is legal due to ARB_gpu_shader5's quantization 32971eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke * rules: 32981eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke * 32991eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke * "Not all values of <offset> may be supported; x and y 33001eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke * offsets may be rounded to fixed-point values with the 33011eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke * number of fraction bits given by the 33021eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke * implementation-dependent constant 33031eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke * FRAGMENT_INTERPOLATION_OFFSET_BITS" 33041eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke */ 33051eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke set_condmod(BRW_CONDITIONAL_L, 33061eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke bld.SEL(offset(src, bld, i), itemp, brw_imm_d(7))); 3307918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke } 33081eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke 33091eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET; 33101eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke emit_pixel_interpolater_send(bld, 33111eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke opcode, 33121eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke dest, 33131eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke src, 33141eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke brw_imm_ud(0u), 33151eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke interpolation); 33161eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke } 33171eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke break; 33181eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke } 33191eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke 33201eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke case nir_intrinsic_load_interpolated_input: { 33211eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke if (nir_intrinsic_base(instr) == VARYING_SLOT_POS) { 33221eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke emit_fragcoord_interpolation(dest); 3323918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke break; 3324918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke } 3325918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 33261eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke assert(instr->src[0].ssa && 33271eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke instr->src[0].ssa->parent_instr->type == nir_instr_type_intrinsic); 33281eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke nir_intrinsic_instr *bary_intrinsic = 33291eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke nir_instr_as_intrinsic(instr->src[0].ssa->parent_instr); 33301eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke nir_intrinsic_op bary_intrin = bary_intrinsic->intrinsic; 33311eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke enum glsl_interp_mode interp_mode = 33321eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke (enum glsl_interp_mode) nir_intrinsic_interp_mode(bary_intrinsic); 33331eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke fs_reg dst_xy; 33341eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke 33351eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke if (bary_intrin == nir_intrinsic_load_barycentric_at_offset || 33361eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke bary_intrin == nir_intrinsic_load_barycentric_at_sample) { 33371eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke /* Use the result of the PI message */ 33381eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke dst_xy = retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_F); 33391eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke } else { 33401eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke /* Use the delta_xy values computed from the payload */ 33411eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke enum brw_barycentric_mode bary = 33421eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke brw_barycentric_mode(interp_mode, bary_intrin); 33431eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke 33441eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke dst_xy = this->delta_xy[bary]; 3345918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke } 3346918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 33471eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke for (unsigned int i = 0; i < instr->num_components; i++) { 33481eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke fs_reg interp = 33491eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke fs_reg(interp_reg(nir_intrinsic_base(instr), 33501eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke nir_intrinsic_component(instr) + i)); 33511eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke interp.type = BRW_REGISTER_TYPE_F; 33521eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke dest.type = BRW_REGISTER_TYPE_F; 3353918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 3354875341c69b99dea7942a68c9060aa31a459e93fcKenneth Graunke if (devinfo->gen < 6 && interp_mode == INTERP_MODE_SMOOTH) { 33551eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke fs_reg tmp = vgrf(glsl_type::float_type); 33561eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke bld.emit(FS_OPCODE_LINTERP, tmp, dst_xy, interp); 33571eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke bld.MUL(offset(dest, bld, i), tmp, this->pixel_w); 33581eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke } else { 33591eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke bld.emit(FS_OPCODE_LINTERP, offset(dest, bld, i), dst_xy, interp); 33601eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke } 3361918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke } 3362918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke break; 3363918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke } 33641eef0b73aa323d94d5a080cd1efa81ccacdbd0d2Kenneth Graunke 3365918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke default: 3366918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke nir_emit_intrinsic(bld, instr); 3367918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke break; 3368918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke } 3369918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke} 3370918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 3371918bda23dda36004c95f6441328ecc892e068886Kenneth Graunkevoid 3372918bda23dda36004c95f6441328ecc892e068886Kenneth Graunkefs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld, 3373918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke nir_intrinsic_instr *instr) 3374918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke{ 3375918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke assert(stage == MESA_SHADER_COMPUTE); 3376e51e055fcdf8107aafaba358fa65b00f963e1728Kenneth Graunke struct brw_cs_prog_data *cs_prog_data = brw_cs_prog_data(prog_data); 3377918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 3378918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke fs_reg dest; 3379918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke if (nir_intrinsic_infos[instr->intrinsic].has_dest) 3380918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke dest = get_nir_dest(instr->dest); 3381918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 3382918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke switch (instr->intrinsic) { 3383918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_barrier: 3384918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke emit_barrier(); 3385918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke cs_prog_data->uses_barrier = true; 3386918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke break; 3387918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 3388918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_load_local_invocation_id: 3389918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_load_work_group_id: { 3390918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic); 3391918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke fs_reg val = nir_system_values[sv]; 3392918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke assert(val.file != BAD_FILE); 3393918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke dest.type = val.type; 3394918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke for (unsigned i = 0; i < 3; i++) 3395918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke bld.MOV(offset(dest, bld, i), offset(val, bld, i)); 3396918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke break; 3397918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke } 3398918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 3399918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_load_num_work_groups: { 3400918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke const unsigned surface = 3401918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke cs_prog_data->binding_table.work_groups_start; 3402918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 3403918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke cs_prog_data->uses_num_work_groups = true; 3404918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 34053ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner fs_reg surf_index = brw_imm_ud(surface); 3406918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke brw_mark_surface_used(prog_data, surface); 3407918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 3408918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke /* Read the 3 GLuint components of gl_NumWorkGroups */ 3409918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke for (unsigned i = 0; i < 3; i++) { 3410918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke fs_reg read_result = 3411918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke emit_untyped_read(bld, surf_index, 34123ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner brw_imm_ud(i << 2), 3413918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 1 /* dims */, 1 /* size */, 3414918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke BRW_PREDICATE_NONE); 3415918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke read_result.type = dest.type; 3416918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke bld.MOV(dest, read_result); 3417918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke dest = offset(dest, bld, 1); 3418918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke } 3419918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke break; 3420918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke } 3421918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 3422e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen case nir_intrinsic_shared_atomic_add: 3423e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen nir_emit_shared_atomic(bld, BRW_AOP_ADD, instr); 3424e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen break; 3425e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen case nir_intrinsic_shared_atomic_imin: 3426e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen nir_emit_shared_atomic(bld, BRW_AOP_IMIN, instr); 3427e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen break; 3428e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen case nir_intrinsic_shared_atomic_umin: 3429e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen nir_emit_shared_atomic(bld, BRW_AOP_UMIN, instr); 3430e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen break; 3431e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen case nir_intrinsic_shared_atomic_imax: 3432e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen nir_emit_shared_atomic(bld, BRW_AOP_IMAX, instr); 3433e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen break; 3434e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen case nir_intrinsic_shared_atomic_umax: 3435e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen nir_emit_shared_atomic(bld, BRW_AOP_UMAX, instr); 3436e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen break; 3437e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen case nir_intrinsic_shared_atomic_and: 3438e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen nir_emit_shared_atomic(bld, BRW_AOP_AND, instr); 3439e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen break; 3440e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen case nir_intrinsic_shared_atomic_or: 3441e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen nir_emit_shared_atomic(bld, BRW_AOP_OR, instr); 3442e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen break; 3443e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen case nir_intrinsic_shared_atomic_xor: 3444e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen nir_emit_shared_atomic(bld, BRW_AOP_XOR, instr); 3445e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen break; 3446e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen case nir_intrinsic_shared_atomic_exchange: 3447e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen nir_emit_shared_atomic(bld, BRW_AOP_MOV, instr); 3448e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen break; 3449e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen case nir_intrinsic_shared_atomic_comp_swap: 3450e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen nir_emit_shared_atomic(bld, BRW_AOP_CMPWR, instr); 3451e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen break; 3452e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen 34535ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand case nir_intrinsic_load_shared: { 34545ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand assert(devinfo->gen >= 7); 34555ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand 34565ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM); 34575ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand 34585ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand /* Get the offset to read from */ 34595ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand fs_reg offset_reg; 34605ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); 34615ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand if (const_offset) { 34625ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]); 34635ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand } else { 34645ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand offset_reg = vgrf(glsl_type::uint_type); 34655ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand bld.ADD(offset_reg, 34665ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD), 34675ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand brw_imm_ud(instr->const_index[0])); 34685ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand } 34695ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand 34705ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand /* Read the vector */ 34718aa01ac596fc0722058e10808c8141533c3fd1feIago Toral Quiroga do_untyped_vector_read(bld, dest, surf_index, offset_reg, 34728aa01ac596fc0722058e10808c8141533c3fd1feIago Toral Quiroga instr->num_components); 34735ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand break; 34745ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand } 34755ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand 34765ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand case nir_intrinsic_store_shared: { 34775ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand assert(devinfo->gen >= 7); 34785ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand 34795ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand /* Block index */ 34805ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM); 34815ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand 34825ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand /* Value */ 34835ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand fs_reg val_reg = get_nir_src(instr->src[0]); 34845ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand 34855ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand /* Writemask */ 34865ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand unsigned writemask = instr->const_index[1]; 34875ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand 34888c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga /* get_nir_src() retypes to integer. Be wary of 64-bit types though 34898c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga * since the untyped writes below operate in units of 32-bits, which 34908c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga * means that we need to write twice as many components each time. 34918c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga * Also, we have to suffle 64-bit data to be in the appropriate layout 34928c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga * expected by our 32-bit write messages. 34938c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga */ 34948c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga unsigned type_size = 4; 34958c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga unsigned bit_size = instr->src[0].is_ssa ? 34968c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga instr->src[0].ssa->bit_size : instr->src[0].reg.reg->bit_size; 34978c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga if (bit_size == 64) { 34988c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga type_size = 8; 34998c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga fs_reg tmp = 35008c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga fs_reg(VGRF, alloc.allocate(alloc.sizes[val_reg.nr]), val_reg.type); 35018c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga shuffle_64bit_data_for_32bit_write( 35028c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga bld, 35038c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga retype(tmp, BRW_REGISTER_TYPE_F), 35048c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga retype(val_reg, BRW_REGISTER_TYPE_DF), 35058c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga instr->num_components); 35068c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga val_reg = tmp; 35078c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga } 35088c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga 35098c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga unsigned type_slots = type_size / 4; 35108c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga 35115ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand /* Combine groups of consecutive enabled channels in one write 35125ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand * message. We use ffs to find the first enabled channel and then ffs on 35135ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand * the bit-inverse, down-shifted writemask to determine the length of 35145ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand * the block of enabled bits. 35155ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand */ 35165ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand while (writemask) { 35175ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand unsigned first_component = ffs(writemask) - 1; 35185ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand unsigned length = ffs(~(writemask >> first_component)) - 1; 35195ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand 35208c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga /* We can't write more than 2 64-bit components at once. Limit the 35218c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga * length of the write to what we can do and let the next iteration 35228c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga * handle the rest 35238c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga */ 35248c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga if (type_size > 4) 35258c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga length = MIN2(2, length); 35268c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga 35278c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga fs_reg offset_reg; 35285ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); 35295ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand if (const_offset) { 35305ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0] + 35318c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga type_size * first_component); 35325ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand } else { 35335ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand offset_reg = vgrf(glsl_type::uint_type); 35345ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand bld.ADD(offset_reg, 35355ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD), 35368c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga brw_imm_ud(instr->const_index[0] + type_size * first_component)); 35375ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand } 35385ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand 35395ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand emit_untyped_write(bld, surf_index, offset_reg, 35408c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga offset(val_reg, bld, first_component * type_slots), 35418c6d147373cbdefef5945b00626bb62bb03198caIago Toral Quiroga 1 /* dims */, length * type_slots, 35425ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand BRW_PREDICATE_NONE); 35435ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand 35445ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand /* Clear the bits in the writemask that we just wrote, then try 35455ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand * again to see if more channels are left. 35465ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand */ 35475ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand writemask &= (15 << (first_component + length)); 35485ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand } 35495ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand 35505ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand break; 35515ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand } 35525ea3647f89abccea5496824815b5b729f38f7a23Jason Ekstrand 3553918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke default: 3554918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke nir_emit_intrinsic(bld, instr); 3555918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke break; 3556918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke } 3557918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke} 3558918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 3559918bda23dda36004c95f6441328ecc892e068886Kenneth Graunkevoid 3560918bda23dda36004c95f6441328ecc892e068886Kenneth Graunkefs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr) 3561918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke{ 3562918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke fs_reg dest; 3563918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke if (nir_intrinsic_infos[instr->intrinsic].has_dest) 3564918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke dest = get_nir_dest(instr->dest); 3565918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 3566918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke switch (instr->intrinsic) { 3567918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_atomic_counter_inc: 3568918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_atomic_counter_dec: 356940dd45d0c6aa4a9d727c09225967e9c3b1f45854Ian Romanick case nir_intrinsic_atomic_counter_read: 357040dd45d0c6aa4a9d727c09225967e9c3b1f45854Ian Romanick case nir_intrinsic_atomic_counter_add: 357140dd45d0c6aa4a9d727c09225967e9c3b1f45854Ian Romanick case nir_intrinsic_atomic_counter_min: 357240dd45d0c6aa4a9d727c09225967e9c3b1f45854Ian Romanick case nir_intrinsic_atomic_counter_max: 357340dd45d0c6aa4a9d727c09225967e9c3b1f45854Ian Romanick case nir_intrinsic_atomic_counter_and: 357440dd45d0c6aa4a9d727c09225967e9c3b1f45854Ian Romanick case nir_intrinsic_atomic_counter_or: 357540dd45d0c6aa4a9d727c09225967e9c3b1f45854Ian Romanick case nir_intrinsic_atomic_counter_xor: 357640dd45d0c6aa4a9d727c09225967e9c3b1f45854Ian Romanick case nir_intrinsic_atomic_counter_exchange: 357740dd45d0c6aa4a9d727c09225967e9c3b1f45854Ian Romanick case nir_intrinsic_atomic_counter_comp_swap: { 35783fb289f957a8a27349a6f7df03983f92d9b6cf64Jason Ekstrand if (stage == MESA_SHADER_FRAGMENT && 35793fb289f957a8a27349a6f7df03983f92d9b6cf64Jason Ekstrand instr->intrinsic != nir_intrinsic_atomic_counter_read) 3580e51e055fcdf8107aafaba358fa65b00f963e1728Kenneth Graunke brw_wm_prog_data(prog_data)->has_side_effects = true; 35813fb289f957a8a27349a6f7df03983f92d9b6cf64Jason Ekstrand 358240dd45d0c6aa4a9d727c09225967e9c3b1f45854Ian Romanick /* Get some metadata from the image intrinsic. */ 358340dd45d0c6aa4a9d727c09225967e9c3b1f45854Ian Romanick const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; 358440dd45d0c6aa4a9d727c09225967e9c3b1f45854Ian Romanick 3585918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke /* Get the arguments of the atomic intrinsic. */ 3586918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke const fs_reg offset = get_nir_src(instr->src[0]); 3587918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke const unsigned surface = (stage_prog_data->binding_table.abo_start + 3588918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke instr->const_index[0]); 358940dd45d0c6aa4a9d727c09225967e9c3b1f45854Ian Romanick const fs_reg src0 = (info->num_srcs >= 2 359040dd45d0c6aa4a9d727c09225967e9c3b1f45854Ian Romanick ? get_nir_src(instr->src[1]) : fs_reg()); 359140dd45d0c6aa4a9d727c09225967e9c3b1f45854Ian Romanick const fs_reg src1 = (info->num_srcs >= 3 359240dd45d0c6aa4a9d727c09225967e9c3b1f45854Ian Romanick ? get_nir_src(instr->src[2]) : fs_reg()); 3593918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke fs_reg tmp; 3594918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 359540dd45d0c6aa4a9d727c09225967e9c3b1f45854Ian Romanick assert(info->num_srcs <= 3); 359640dd45d0c6aa4a9d727c09225967e9c3b1f45854Ian Romanick 3597918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke /* Emit a surface read or atomic op. */ 35983d2011cb33317b0fe9b8fe989916efc1841c6ce0Ian Romanick if (instr->intrinsic == nir_intrinsic_atomic_counter_read) { 35993ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner tmp = emit_untyped_read(bld, brw_imm_ud(surface), offset, 1, 1); 36003d2011cb33317b0fe9b8fe989916efc1841c6ce0Ian Romanick } else { 360140dd45d0c6aa4a9d727c09225967e9c3b1f45854Ian Romanick tmp = emit_untyped_atomic(bld, brw_imm_ud(surface), offset, src0, 360240dd45d0c6aa4a9d727c09225967e9c3b1f45854Ian Romanick src1, 1, 1, 36033d2011cb33317b0fe9b8fe989916efc1841c6ce0Ian Romanick get_atomic_counter_op(instr->intrinsic)); 3604918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke } 3605918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 3606918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke /* Assign the result. */ 3607918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD), tmp); 3608918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 3609918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke /* Mark the surface as used. */ 3610918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke brw_mark_surface_used(stage_prog_data, surface); 3611918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke break; 3612918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke } 3613918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 3614918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_image_load: 3615918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_image_store: 3616918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_image_atomic_add: 3617918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_image_atomic_min: 3618918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_image_atomic_max: 3619918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_image_atomic_and: 3620918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_image_atomic_or: 3621918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_image_atomic_xor: 3622918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_image_atomic_exchange: 3623918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke case nir_intrinsic_image_atomic_comp_swap: { 3624918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke using namespace image_access; 3625918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 36263fb289f957a8a27349a6f7df03983f92d9b6cf64Jason Ekstrand if (stage == MESA_SHADER_FRAGMENT && 36273fb289f957a8a27349a6f7df03983f92d9b6cf64Jason Ekstrand instr->intrinsic != nir_intrinsic_image_load) 3628e51e055fcdf8107aafaba358fa65b00f963e1728Kenneth Graunke brw_wm_prog_data(prog_data)->has_side_effects = true; 36293fb289f957a8a27349a6f7df03983f92d9b6cf64Jason Ekstrand 3630918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke /* Get the referenced image variable and type. */ 3631918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke const nir_variable *var = instr->variables[0]->var; 3632918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke const glsl_type *type = var->type->without_array(); 3633918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke const brw_reg_type base_type = get_image_base_type(type); 3634918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 3635918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke /* Get some metadata from the image intrinsic. */ 3636918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; 3637918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke const unsigned arr_dims = type->sampler_array ? 1 : 0; 3638918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke const unsigned surf_dims = type->coordinate_components() - arr_dims; 3639f310c02b94fba0a0a5ea7f5573f906de823cc5feJason Ekstrand const unsigned format = var->data.image.format; 3640918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 3641918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke /* Get the arguments of the image intrinsic. */ 3642918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke const fs_reg image = get_nir_image_deref(instr->variables[0]); 3643918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke const fs_reg addr = retype(get_nir_src(instr->src[0]), 3644918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke BRW_REGISTER_TYPE_UD); 3645918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke const fs_reg src0 = (info->num_srcs >= 3 ? 3646918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke retype(get_nir_src(instr->src[2]), base_type) : 3647918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke fs_reg()); 3648918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke const fs_reg src1 = (info->num_srcs >= 4 ? 3649918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke retype(get_nir_src(instr->src[3]), base_type) : 3650918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke fs_reg()); 3651918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke fs_reg tmp; 3652918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 3653918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke /* Emit an image load, store or atomic op. */ 3654918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke if (instr->intrinsic == nir_intrinsic_image_load) 3655918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke tmp = emit_image_load(bld, image, addr, surf_dims, arr_dims, format); 3656918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 3657918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke else if (instr->intrinsic == nir_intrinsic_image_store) 3658f310c02b94fba0a0a5ea7f5573f906de823cc5feJason Ekstrand emit_image_store(bld, image, addr, src0, surf_dims, arr_dims, 3659f310c02b94fba0a0a5ea7f5573f906de823cc5feJason Ekstrand var->data.image.write_only ? GL_NONE : format); 3660918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke 3661918bda23dda36004c95f6441328ecc892e068886Kenneth Graunke else 3662a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez tmp = emit_image_atomic(bld, image, addr, src0, src1, 3663a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez surf_dims, arr_dims, info->dest_components, 3664a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez get_image_atomic_op(instr->intrinsic, type)); 3665a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez 3666a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez /* Assign the result. */ 3667a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez for (unsigned c = 0; c < info->dest_components; ++c) 3668a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez bld.MOV(offset(retype(dest, base_type), bld, c), 3669a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez offset(tmp, bld, c)); 3670a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez break; 3671a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez } 3672a47ae8de2cf30fbe45318a18a2ea032f30ab7d10Francisco Jerez 3673faa119307035787f5e421dd6a9eb4d0101de963bJordan Justen case nir_intrinsic_memory_barrier_atomic_counter: 3674faa119307035787f5e421dd6a9eb4d0101de963bJordan Justen case nir_intrinsic_memory_barrier_buffer: 3675faa119307035787f5e421dd6a9eb4d0101de963bJordan Justen case nir_intrinsic_memory_barrier_image: 36767cb60d770fc24bf00b6f7e5898cca1426e55c026Francisco Jerez case nir_intrinsic_memory_barrier: { 3677a55452530f7525e9cf5d2619bef66a61b488b4afFrancisco Jerez const fs_builder ubld = bld.group(8, 0); 3678a55452530f7525e9cf5d2619bef66a61b488b4afFrancisco Jerez const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2); 3679a55452530f7525e9cf5d2619bef66a61b488b4afFrancisco Jerez ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp) 368069570bbad876bb9da609c3b651aacda28cecc542Francisco Jerez ->size_written = 2 * REG_SIZE; 36817cb60d770fc24bf00b6f7e5898cca1426e55c026Francisco Jerez break; 36827cb60d770fc24bf00b6f7e5898cca1426e55c026Francisco Jerez } 36837cb60d770fc24bf00b6f7e5898cca1426e55c026Francisco Jerez 3684faa119307035787f5e421dd6a9eb4d0101de963bJordan Justen case nir_intrinsic_group_memory_barrier: 3685faa119307035787f5e421dd6a9eb4d0101de963bJordan Justen case nir_intrinsic_memory_barrier_shared: 368651694072218b5ae84b5d8f98ee2172d7c5d61b31Francisco Jerez /* We treat these workgroup-level barriers as no-ops. This should be 368751694072218b5ae84b5d8f98ee2172d7c5d61b31Francisco Jerez * safe at present and as long as: 368851694072218b5ae84b5d8f98ee2172d7c5d61b31Francisco Jerez * 368951694072218b5ae84b5d8f98ee2172d7c5d61b31Francisco Jerez * - Memory access instructions are not subsequently reordered by the 369051694072218b5ae84b5d8f98ee2172d7c5d61b31Francisco Jerez * compiler back-end. 369151694072218b5ae84b5d8f98ee2172d7c5d61b31Francisco Jerez * 369251694072218b5ae84b5d8f98ee2172d7c5d61b31Francisco Jerez * - All threads from a given compute shader workgroup fit within a 369351694072218b5ae84b5d8f98ee2172d7c5d61b31Francisco Jerez * single subslice and therefore talk to the same HDC shared unit 369451694072218b5ae84b5d8f98ee2172d7c5d61b31Francisco Jerez * what supposedly guarantees ordering and coherency between threads 369551694072218b5ae84b5d8f98ee2172d7c5d61b31Francisco Jerez * from the same workgroup. This may change in the future when we 369651694072218b5ae84b5d8f98ee2172d7c5d61b31Francisco Jerez * start splitting workgroups across multiple subslices. 369751694072218b5ae84b5d8f98ee2172d7c5d61b31Francisco Jerez * 369851694072218b5ae84b5d8f98ee2172d7c5d61b31Francisco Jerez * - The context is not in fault-and-stream mode, which could cause 369951694072218b5ae84b5d8f98ee2172d7c5d61b31Francisco Jerez * memory transactions (including to SLM) prior to the barrier to be 370051694072218b5ae84b5d8f98ee2172d7c5d61b31Francisco Jerez * replayed after the barrier if a pagefault occurs. This shouldn't 370151694072218b5ae84b5d8f98ee2172d7c5d61b31Francisco Jerez * be a problem up to and including SKL because fault-and-stream is 370251694072218b5ae84b5d8f98ee2172d7c5d61b31Francisco Jerez * not usable due to hardware issues, but that's likely to change in 370351694072218b5ae84b5d8f98ee2172d7c5d61b31Francisco Jerez * the future. 370451694072218b5ae84b5d8f98ee2172d7c5d61b31Francisco Jerez */ 3705faa119307035787f5e421dd6a9eb4d0101de963bJordan Justen break; 3706faa119307035787f5e421dd6a9eb4d0101de963bJordan Justen 37074379ca22f18f5731248ee794ab651db721ba38b2Emil Velikov case nir_intrinsic_shader_clock: { 37084379ca22f18f5731248ee794ab651db721ba38b2Emil Velikov /* We cannot do anything if there is an event, so ignore it for now */ 370980e1d670b4b4c080ce2092a3b52d2415bc4c6a42Francisco Jerez const fs_reg shader_clock = get_timestamp(bld); 371080e1d670b4b4c080ce2092a3b52d2415bc4c6a42Francisco Jerez const fs_reg srcs[] = { component(shader_clock, 0), 371180e1d670b4b4c080ce2092a3b52d2415bc4c6a42Francisco Jerez component(shader_clock, 1) }; 37124379ca22f18f5731248ee794ab651db721ba38b2Emil Velikov bld.LOAD_PAYLOAD(dest, srcs, ARRAY_SIZE(srcs), 0); 37134379ca22f18f5731248ee794ab651db721ba38b2Emil Velikov break; 37144379ca22f18f5731248ee794ab651db721ba38b2Emil Velikov } 37154379ca22f18f5731248ee794ab651db721ba38b2Emil Velikov 371650db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres case nir_intrinsic_image_size: { 371750db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres /* Get the referenced image variable and type. */ 371850db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres const nir_variable *var = instr->variables[0]->var; 371950db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres const glsl_type *type = var->type->without_array(); 372050db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres 372150db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres /* Get the size of the image. */ 372250db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres const fs_reg image = get_nir_image_deref(instr->variables[0]); 372350db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres const fs_reg size = offset(image, bld, BRW_IMAGE_PARAM_SIZE_OFFSET); 372450db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres 372550db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres /* For 1DArray image types, the array index is stored in the Z component. 372650db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres * Fix this by swizzling the Z component to the Y component. 372750db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres */ 372850db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres const bool is_1d_array_image = 372950db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres type->sampler_dimensionality == GLSL_SAMPLER_DIM_1D && 373050db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres type->sampler_array; 373150db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres 373250db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres /* For CubeArray images, we should count the number of cubes instead 373350db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres * of the number of faces. Fix it by dividing the (Z component) by 6. 373450db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres */ 373550db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres const bool is_cube_array_image = 373650db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE && 373750db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres type->sampler_array; 373850db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres 373950db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres /* Copy all the components. */ 374050db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; 374150db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres for (unsigned c = 0; c < info->dest_components; ++c) { 374250db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres if ((int)c >= type->coordinate_components()) { 374350db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres bld.MOV(offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c), 37443ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner brw_imm_d(1)); 374550db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres } else if (c == 1 && is_1d_array_image) { 374656ebd3314bfc5895fab47586fc8cda024aac4fd8Martin Peres bld.MOV(offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c), 374750db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres offset(size, bld, 2)); 374850db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres } else if (c == 2 && is_cube_array_image) { 374950db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres bld.emit(SHADER_OPCODE_INT_QUOTIENT, 375056ebd3314bfc5895fab47586fc8cda024aac4fd8Martin Peres offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c), 37513ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner offset(size, bld, c), brw_imm_d(6)); 375250db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres } else { 375356ebd3314bfc5895fab47586fc8cda024aac4fd8Martin Peres bld.MOV(offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c), 375450db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres offset(size, bld, c)); 375550db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres } 375650db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres } 375750db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres 375850db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres break; 375950db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres } 376050db9c1db645c1a4d5777d2cacfd7ac74ebbe544Martin Peres 376155ebaa6d003b69c0a159a00d82a1e96f685062d6Ilia Mirkin case nir_intrinsic_image_samples: 376255ebaa6d003b69c0a159a00d82a1e96f685062d6Ilia Mirkin /* The driver does not support multi-sampled images. */ 37633ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), brw_imm_d(1)); 376455ebaa6d003b69c0a159a00d82a1e96f685062d6Ilia Mirkin break; 376555ebaa6d003b69c0a159a00d82a1e96f685062d6Ilia Mirkin 376627663dbe8edfb7583d9d8fc3704a04a5c837fe05Jason Ekstrand case nir_intrinsic_load_uniform: { 376718069dce4a4c3d71e6afc6b10bfa7bee0560ba9cJason Ekstrand /* Offsets are in bytes but they should always be multiples of 4 */ 376818069dce4a4c3d71e6afc6b10bfa7bee0560ba9cJason Ekstrand assert(instr->const_index[0] % 4 == 0); 376918069dce4a4c3d71e6afc6b10bfa7bee0560ba9cJason Ekstrand 3770f3970fad9e5b04e04de366a65fed5a30da618f9dJason Ekstrand fs_reg src(UNIFORM, instr->const_index[0] / 4, dest.type); 377146c35c61e9c5c1b56fdd9fcd4eb45591dd16d21dJason Ekstrand 377278b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); 377378b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand if (const_offset) { 377478b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand /* Offsets are in bytes but they should always be multiples of 4 */ 3775084b24f5582567ebf5aa94b7f40ae3bdcb71316bIago Toral Quiroga assert(const_offset->u32[0] % 4 == 0); 3776950af5ed40895ba7eb664a64e869cf4ae1104fc7Francisco Jerez src.offset = const_offset->u32[0]; 37773c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand 37783c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand for (unsigned j = 0; j < instr->num_components; j++) { 37793c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand bld.MOV(offset(dest, bld, j), offset(src, bld, j)); 37803c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand } 378178b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand } else { 37823c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand fs_reg indirect = retype(get_nir_src(instr->src[0]), 37833c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand BRW_REGISTER_TYPE_UD); 37842126c68e5cba79709e228f12eb3062a9be634a0eJason Ekstrand 37853c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand /* We need to pass a size to the MOV_INDIRECT but we don't want it to 37863c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand * go past the end of the uniform. In order to keep the n'th 37873c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand * component from running past, we subtract off the size of all but 37883c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand * one component of the vector. 37893c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand */ 379075ada43a3af88835de6a83ed453d4ed512df0412Samuel Iglesias Gonsálvez assert(instr->const_index[1] >= 379175ada43a3af88835de6a83ed453d4ed512df0412Samuel Iglesias Gonsálvez instr->num_components * (int) type_sz(dest.type)); 37923c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand unsigned read_size = instr->const_index[1] - 379375ada43a3af88835de6a83ed453d4ed512df0412Samuel Iglesias Gonsálvez (instr->num_components - 1) * type_sz(dest.type); 37943c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand 379559e6c0d8aee718cf58198d5a5b2adce3e01391a6Samuel Iglesias Gonsálvez bool supports_64bit_indirects = 379659e6c0d8aee718cf58198d5a5b2adce3e01391a6Samuel Iglesias Gonsálvez !devinfo->is_cherryview && !devinfo->is_broxton; 3797bdab572a86f27b92ba10124f85d278e9c8861fffSamuel Iglesias Gonsálvez 379859e6c0d8aee718cf58198d5a5b2adce3e01391a6Samuel Iglesias Gonsálvez if (type_sz(dest.type) != 8 || supports_64bit_indirects) { 379959e6c0d8aee718cf58198d5a5b2adce3e01391a6Samuel Iglesias Gonsálvez for (unsigned j = 0; j < instr->num_components; j++) { 3800bdab572a86f27b92ba10124f85d278e9c8861fffSamuel Iglesias Gonsálvez bld.emit(SHADER_OPCODE_MOV_INDIRECT, 3801bdab572a86f27b92ba10124f85d278e9c8861fffSamuel Iglesias Gonsálvez offset(dest, bld, j), offset(src, bld, j), 3802bdab572a86f27b92ba10124f85d278e9c8861fffSamuel Iglesias Gonsálvez indirect, brw_imm_ud(read_size)); 380359e6c0d8aee718cf58198d5a5b2adce3e01391a6Samuel Iglesias Gonsálvez } 380459e6c0d8aee718cf58198d5a5b2adce3e01391a6Samuel Iglesias Gonsálvez } else { 380559e6c0d8aee718cf58198d5a5b2adce3e01391a6Samuel Iglesias Gonsálvez const unsigned num_mov_indirects = 380659e6c0d8aee718cf58198d5a5b2adce3e01391a6Samuel Iglesias Gonsálvez type_sz(dest.type) / type_sz(BRW_REGISTER_TYPE_UD); 380759e6c0d8aee718cf58198d5a5b2adce3e01391a6Samuel Iglesias Gonsálvez /* We read a little bit less per MOV INDIRECT, as they are now 380859e6c0d8aee718cf58198d5a5b2adce3e01391a6Samuel Iglesias Gonsálvez * 32-bits ones instead of 64-bit. Fix read_size then. 380959e6c0d8aee718cf58198d5a5b2adce3e01391a6Samuel Iglesias Gonsálvez */ 381059e6c0d8aee718cf58198d5a5b2adce3e01391a6Samuel Iglesias Gonsálvez const unsigned read_size_32bit = read_size - 381159e6c0d8aee718cf58198d5a5b2adce3e01391a6Samuel Iglesias Gonsálvez (num_mov_indirects - 1) * type_sz(BRW_REGISTER_TYPE_UD); 381259e6c0d8aee718cf58198d5a5b2adce3e01391a6Samuel Iglesias Gonsálvez for (unsigned j = 0; j < instr->num_components; j++) { 381359e6c0d8aee718cf58198d5a5b2adce3e01391a6Samuel Iglesias Gonsálvez for (unsigned i = 0; i < num_mov_indirects; i++) { 381459e6c0d8aee718cf58198d5a5b2adce3e01391a6Samuel Iglesias Gonsálvez bld.emit(SHADER_OPCODE_MOV_INDIRECT, 381559e6c0d8aee718cf58198d5a5b2adce3e01391a6Samuel Iglesias Gonsálvez subscript(offset(dest, bld, j), BRW_REGISTER_TYPE_UD, i), 381659e6c0d8aee718cf58198d5a5b2adce3e01391a6Samuel Iglesias Gonsálvez subscript(offset(src, bld, j), BRW_REGISTER_TYPE_UD, i), 381759e6c0d8aee718cf58198d5a5b2adce3e01391a6Samuel Iglesias Gonsálvez indirect, brw_imm_ud(read_size_32bit)); 381859e6c0d8aee718cf58198d5a5b2adce3e01391a6Samuel Iglesias Gonsálvez } 3819bdab572a86f27b92ba10124f85d278e9c8861fffSamuel Iglesias Gonsálvez } 38203c93cdfaf598bc3c28e3dc288da35675c666602bJason Ekstrand } 38212faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott } 38222faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 38232faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott } 38242faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 382527663dbe8edfb7583d9d8fc3704a04a5c837fe05Jason Ekstrand case nir_intrinsic_load_ubo: { 3826534d145e5ea039d57833395a36eed90721f6b272Jason Ekstrand nir_const_value *const_index = nir_src_as_const_value(instr->src[0]); 3827534d145e5ea039d57833395a36eed90721f6b272Jason Ekstrand fs_reg surf_index; 38282faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 3829534d145e5ea039d57833395a36eed90721f6b272Jason Ekstrand if (const_index) { 3830027b64a55afc0fe8efcf9f6217192807e285c830Iago Toral Quiroga const unsigned index = stage_prog_data->binding_table.ubo_start + 3831084b24f5582567ebf5aa94b7f40ae3bdcb71316bIago Toral Quiroga const_index->u32[0]; 38323ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner surf_index = brw_imm_ud(index); 3833027b64a55afc0fe8efcf9f6217192807e285c830Iago Toral Quiroga brw_mark_surface_used(prog_data, index); 3834534d145e5ea039d57833395a36eed90721f6b272Jason Ekstrand } else { 3835534d145e5ea039d57833395a36eed90721f6b272Jason Ekstrand /* The block index is not a constant. Evaluate the index expression 3836b234537cc3e513ded9b5385d876e4c531f72af94Francisco Jerez * per-channel and add the base UBO index; we have to select a value 3837b234537cc3e513ded9b5385d876e4c531f72af94Francisco Jerez * from any live channel. 3838534d145e5ea039d57833395a36eed90721f6b272Jason Ekstrand */ 3839d1533d87cc7e2c39e7ce9dc838b45a2c39c96e33Kenneth Graunke surf_index = vgrf(glsl_type::uint_type); 3840979fe2ffee3956186017fe6c115aed53fc87ad3dFrancisco Jerez bld.ADD(surf_index, get_nir_src(instr->src[0]), 38413ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner brw_imm_ud(stage_prog_data->binding_table.ubo_start)); 3842fadf34773527779eef4622b2586d87ec00476c0fFrancisco Jerez surf_index = bld.emit_uniformize(surf_index); 38432faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 3844534d145e5ea039d57833395a36eed90721f6b272Jason Ekstrand /* Assume this may touch any UBO. It would be nice to provide 3845534d145e5ea039d57833395a36eed90721f6b272Jason Ekstrand * a tighter bound, but the array information is already lowered away. 38462faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott */ 3847534d145e5ea039d57833395a36eed90721f6b272Jason Ekstrand brw_mark_surface_used(prog_data, 3848534d145e5ea039d57833395a36eed90721f6b272Jason Ekstrand stage_prog_data->binding_table.ubo_start + 3849e1af20f18a86f52a9640faf2d4ff8a71b0a4fa9bTimothy Arceri nir->info->num_ubos - 1); 3850534d145e5ea039d57833395a36eed90721f6b272Jason Ekstrand } 38512faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 385278b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); 385378b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand if (const_offset == NULL) { 385413ad8d03f201a4d09bf7ab9078b00807d61dfadaJason Ekstrand fs_reg base_offset = retype(get_nir_src(instr->src[1]), 3855240d16ea94834eb2472e91fd4856381951a07007Jason Ekstrand BRW_REGISTER_TYPE_UD); 38562faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 3857dfb3abbaecfbe30b8858a5428c604f9d90f65505Jason Ekstrand for (int i = 0; i < instr->num_components; i++) 3858f7dcc1160331462a071c54ca1067f9e2f57b55beJason Ekstrand VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index, 3859b86d4780ed203b2a22afba5f95c73b15165a7259Iago Toral Quiroga base_offset, i * type_sz(dest.type)); 3860534d145e5ea039d57833395a36eed90721f6b272Jason Ekstrand } else { 3861b86d4780ed203b2a22afba5f95c73b15165a7259Iago Toral Quiroga /* Even if we are loading doubles, a pull constant load will load 3862b86d4780ed203b2a22afba5f95c73b15165a7259Iago Toral Quiroga * a 32-bit vec4, so should only reserve vgrf space for that. If we 3863b86d4780ed203b2a22afba5f95c73b15165a7259Iago Toral Quiroga * need to load a full dvec4 we will have to emit 2 loads. This is 3864b86d4780ed203b2a22afba5f95c73b15165a7259Iago Toral Quiroga * similar to demote_pull_constants(), except that in that case we 3865b86d4780ed203b2a22afba5f95c73b15165a7259Iago Toral Quiroga * see individual accesses to each component of the vector and then 3866b86d4780ed203b2a22afba5f95c73b15165a7259Iago Toral Quiroga * we let CSE deal with duplicate loads. Here we see a vector access 3867b86d4780ed203b2a22afba5f95c73b15165a7259Iago Toral Quiroga * and we have to split it if necessary. 3868b86d4780ed203b2a22afba5f95c73b15165a7259Iago Toral Quiroga */ 3869b46867cd378e5fb135fd060d50c8028d3dac622aFrancisco Jerez const unsigned type_size = type_sz(dest.type); 3870b56fa830c6095f8226456b2aeb62f2dfad804be5Francisco Jerez const unsigned block_sz = 64; /* Fetch one cacheline at a time. */ 3871b56fa830c6095f8226456b2aeb62f2dfad804be5Francisco Jerez const fs_builder ubld = bld.exec_all().group(block_sz / 4, 0); 3872b56fa830c6095f8226456b2aeb62f2dfad804be5Francisco Jerez const fs_reg packed_consts = ubld.vgrf(BRW_REGISTER_TYPE_UD); 38739b22a0d295316b7547667ebbfe1e1b6182439186Francisco Jerez 3874b46867cd378e5fb135fd060d50c8028d3dac622aFrancisco Jerez for (unsigned c = 0; c < instr->num_components;) { 3875b46867cd378e5fb135fd060d50c8028d3dac622aFrancisco Jerez const unsigned base = const_offset->u32[0] + c * type_size; 3876b56fa830c6095f8226456b2aeb62f2dfad804be5Francisco Jerez /* Number of usable components in the next block-aligned load. */ 3877b46867cd378e5fb135fd060d50c8028d3dac622aFrancisco Jerez const unsigned count = MIN2(instr->num_components - c, 3878b56fa830c6095f8226456b2aeb62f2dfad804be5Francisco Jerez (block_sz - base % block_sz) / type_size); 3879b86d4780ed203b2a22afba5f95c73b15165a7259Iago Toral Quiroga 38809b22a0d295316b7547667ebbfe1e1b6182439186Francisco Jerez ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, 3881b56fa830c6095f8226456b2aeb62f2dfad804be5Francisco Jerez packed_consts, surf_index, 3882b56fa830c6095f8226456b2aeb62f2dfad804be5Francisco Jerez brw_imm_ud(base & ~(block_sz - 1))); 3883b86d4780ed203b2a22afba5f95c73b15165a7259Iago Toral Quiroga 3884b46867cd378e5fb135fd060d50c8028d3dac622aFrancisco Jerez const fs_reg consts = 3885b56fa830c6095f8226456b2aeb62f2dfad804be5Francisco Jerez retype(byte_offset(packed_consts, base & (block_sz - 1)), 3886b56fa830c6095f8226456b2aeb62f2dfad804be5Francisco Jerez dest.type); 3887534d145e5ea039d57833395a36eed90721f6b272Jason Ekstrand 3888b46867cd378e5fb135fd060d50c8028d3dac622aFrancisco Jerez for (unsigned d = 0; d < count; d++) 3889b46867cd378e5fb135fd060d50c8028d3dac622aFrancisco Jerez bld.MOV(offset(dest, bld, c + d), component(consts, d)); 3890534d145e5ea039d57833395a36eed90721f6b272Jason Ekstrand 3891b46867cd378e5fb135fd060d50c8028d3dac622aFrancisco Jerez c += count; 3892b86d4780ed203b2a22afba5f95c73b15165a7259Iago Toral Quiroga } 38932faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott } 38942faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 38952faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott } 38962faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 38975b186aafe7a8d3f96a99ad2fddd2bff99d99e923Iago Toral Quiroga case nir_intrinsic_load_ssbo: { 38985b186aafe7a8d3f96a99ad2fddd2bff99d99e923Iago Toral Quiroga assert(devinfo->gen >= 7); 38995b186aafe7a8d3f96a99ad2fddd2bff99d99e923Iago Toral Quiroga 39005b186aafe7a8d3f96a99ad2fddd2bff99d99e923Iago Toral Quiroga nir_const_value *const_uniform_block = 39015b186aafe7a8d3f96a99ad2fddd2bff99d99e923Iago Toral Quiroga nir_src_as_const_value(instr->src[0]); 39025b186aafe7a8d3f96a99ad2fddd2bff99d99e923Iago Toral Quiroga 39035b186aafe7a8d3f96a99ad2fddd2bff99d99e923Iago Toral Quiroga fs_reg surf_index; 39045b186aafe7a8d3f96a99ad2fddd2bff99d99e923Iago Toral Quiroga if (const_uniform_block) { 3905d3f45888045c84b2bc382a34d169a0ede4774a24Iago Toral Quiroga unsigned index = stage_prog_data->binding_table.ssbo_start + 3906084b24f5582567ebf5aa94b7f40ae3bdcb71316bIago Toral Quiroga const_uniform_block->u32[0]; 39073ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner surf_index = brw_imm_ud(index); 39085b186aafe7a8d3f96a99ad2fddd2bff99d99e923Iago Toral Quiroga brw_mark_surface_used(prog_data, index); 39095b186aafe7a8d3f96a99ad2fddd2bff99d99e923Iago Toral Quiroga } else { 39105b186aafe7a8d3f96a99ad2fddd2bff99d99e923Iago Toral Quiroga surf_index = vgrf(glsl_type::uint_type); 39115b186aafe7a8d3f96a99ad2fddd2bff99d99e923Iago Toral Quiroga bld.ADD(surf_index, get_nir_src(instr->src[0]), 39123ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner brw_imm_ud(stage_prog_data->binding_table.ssbo_start)); 39135b186aafe7a8d3f96a99ad2fddd2bff99d99e923Iago Toral Quiroga 39145b186aafe7a8d3f96a99ad2fddd2bff99d99e923Iago Toral Quiroga /* Assume this may touch any UBO. It would be nice to provide 39155b186aafe7a8d3f96a99ad2fddd2bff99d99e923Iago Toral Quiroga * a tighter bound, but the array information is already lowered away. 39165b186aafe7a8d3f96a99ad2fddd2bff99d99e923Iago Toral Quiroga */ 39175b186aafe7a8d3f96a99ad2fddd2bff99d99e923Iago Toral Quiroga brw_mark_surface_used(prog_data, 3918d3f45888045c84b2bc382a34d169a0ede4774a24Iago Toral Quiroga stage_prog_data->binding_table.ssbo_start + 3919e1af20f18a86f52a9640faf2d4ff8a71b0a4fa9bTimothy Arceri nir->info->num_ssbos - 1); 39205b186aafe7a8d3f96a99ad2fddd2bff99d99e923Iago Toral Quiroga } 39215b186aafe7a8d3f96a99ad2fddd2bff99d99e923Iago Toral Quiroga 3922feff21d1a6ba49a0d6f7526e1ff473a0b574c92eKristian Høgsberg Kristensen fs_reg offset_reg; 392378b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); 392478b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand if (const_offset) { 3925084b24f5582567ebf5aa94b7f40ae3bdcb71316bIago Toral Quiroga offset_reg = brw_imm_ud(const_offset->u32[0]); 39265b186aafe7a8d3f96a99ad2fddd2bff99d99e923Iago Toral Quiroga } else { 392778b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand offset_reg = get_nir_src(instr->src[1]); 39285b186aafe7a8d3f96a99ad2fddd2bff99d99e923Iago Toral Quiroga } 39295b186aafe7a8d3f96a99ad2fddd2bff99d99e923Iago Toral Quiroga 39305b186aafe7a8d3f96a99ad2fddd2bff99d99e923Iago Toral Quiroga /* Read the vector */ 393133f7ec18ac399719df06ab7031cb43965e6793beIago Toral Quiroga do_untyped_vector_read(bld, dest, surf_index, offset_reg, 393233f7ec18ac399719df06ab7031cb43965e6793beIago Toral Quiroga instr->num_components); 39335b186aafe7a8d3f96a99ad2fddd2bff99d99e923Iago Toral Quiroga 39345b186aafe7a8d3f96a99ad2fddd2bff99d99e923Iago Toral Quiroga break; 39355b186aafe7a8d3f96a99ad2fddd2bff99d99e923Iago Toral Quiroga } 39365b186aafe7a8d3f96a99ad2fddd2bff99d99e923Iago Toral Quiroga 3937337dad8ceeb4f313a47b4ddb31805f355c3fc3a5Iago Toral Quiroga case nir_intrinsic_store_ssbo: { 3938337dad8ceeb4f313a47b4ddb31805f355c3fc3a5Iago Toral Quiroga assert(devinfo->gen >= 7); 3939337dad8ceeb4f313a47b4ddb31805f355c3fc3a5Iago Toral Quiroga 39403fb289f957a8a27349a6f7df03983f92d9b6cf64Jason Ekstrand if (stage == MESA_SHADER_FRAGMENT) 3941e51e055fcdf8107aafaba358fa65b00f963e1728Kenneth Graunke brw_wm_prog_data(prog_data)->has_side_effects = true; 39423fb289f957a8a27349a6f7df03983f92d9b6cf64Jason Ekstrand 3943337dad8ceeb4f313a47b4ddb31805f355c3fc3a5Iago Toral Quiroga /* Block index */ 3944337dad8ceeb4f313a47b4ddb31805f355c3fc3a5Iago Toral Quiroga fs_reg surf_index; 3945337dad8ceeb4f313a47b4ddb31805f355c3fc3a5Iago Toral Quiroga nir_const_value *const_uniform_block = 3946337dad8ceeb4f313a47b4ddb31805f355c3fc3a5Iago Toral Quiroga nir_src_as_const_value(instr->src[1]); 3947337dad8ceeb4f313a47b4ddb31805f355c3fc3a5Iago Toral Quiroga if (const_uniform_block) { 3948d3f45888045c84b2bc382a34d169a0ede4774a24Iago Toral Quiroga unsigned index = stage_prog_data->binding_table.ssbo_start + 3949084b24f5582567ebf5aa94b7f40ae3bdcb71316bIago Toral Quiroga const_uniform_block->u32[0]; 39503ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner surf_index = brw_imm_ud(index); 3951337dad8ceeb4f313a47b4ddb31805f355c3fc3a5Iago Toral Quiroga brw_mark_surface_used(prog_data, index); 3952337dad8ceeb4f313a47b4ddb31805f355c3fc3a5Iago Toral Quiroga } else { 3953337dad8ceeb4f313a47b4ddb31805f355c3fc3a5Iago Toral Quiroga surf_index = vgrf(glsl_type::uint_type); 3954337dad8ceeb4f313a47b4ddb31805f355c3fc3a5Iago Toral Quiroga bld.ADD(surf_index, get_nir_src(instr->src[1]), 39553ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner brw_imm_ud(stage_prog_data->binding_table.ssbo_start)); 3956337dad8ceeb4f313a47b4ddb31805f355c3fc3a5Iago Toral Quiroga 3957337dad8ceeb4f313a47b4ddb31805f355c3fc3a5Iago Toral Quiroga brw_mark_surface_used(prog_data, 3958d3f45888045c84b2bc382a34d169a0ede4774a24Iago Toral Quiroga stage_prog_data->binding_table.ssbo_start + 3959e1af20f18a86f52a9640faf2d4ff8a71b0a4fa9bTimothy Arceri nir->info->num_ssbos - 1); 3960337dad8ceeb4f313a47b4ddb31805f355c3fc3a5Iago Toral Quiroga } 3961337dad8ceeb4f313a47b4ddb31805f355c3fc3a5Iago Toral Quiroga 3962337dad8ceeb4f313a47b4ddb31805f355c3fc3a5Iago Toral Quiroga /* Value */ 3963337dad8ceeb4f313a47b4ddb31805f355c3fc3a5Iago Toral Quiroga fs_reg val_reg = get_nir_src(instr->src[0]); 3964337dad8ceeb4f313a47b4ddb31805f355c3fc3a5Iago Toral Quiroga 3965337dad8ceeb4f313a47b4ddb31805f355c3fc3a5Iago Toral Quiroga /* Writemask */ 396678b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand unsigned writemask = instr->const_index[0]; 3967337dad8ceeb4f313a47b4ddb31805f355c3fc3a5Iago Toral Quiroga 3968943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga /* get_nir_src() retypes to integer. Be wary of 64-bit types though 3969943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga * since the untyped writes below operate in units of 32-bits, which 3970943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga * means that we need to write twice as many components each time. 3971943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga * Also, we have to suffle 64-bit data to be in the appropriate layout 3972943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga * expected by our 32-bit write messages. 3973943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga */ 3974943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga unsigned type_size = 4; 3975943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga unsigned bit_size = instr->src[0].is_ssa ? 3976943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga instr->src[0].ssa->bit_size : instr->src[0].reg.reg->bit_size; 3977943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga if (bit_size == 64) { 3978943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga type_size = 8; 3979943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga fs_reg tmp = 3980943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga fs_reg(VGRF, alloc.allocate(alloc.sizes[val_reg.nr]), val_reg.type); 3981943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga shuffle_64bit_data_for_32bit_write(bld, 3982943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga retype(tmp, BRW_REGISTER_TYPE_F), 3983943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga retype(val_reg, BRW_REGISTER_TYPE_DF), 3984943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga instr->num_components); 3985943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga val_reg = tmp; 3986943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga } 3987943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga 3988943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga unsigned type_slots = type_size / 4; 3989943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga 39900cb7d7b4b7c32246d4c4225a1d17d7ff79a7526dKristian Høgsberg Kristensen /* Combine groups of consecutive enabled channels in one write 39910cb7d7b4b7c32246d4c4225a1d17d7ff79a7526dKristian Høgsberg Kristensen * message. We use ffs to find the first enabled channel and then ffs on 39920cb7d7b4b7c32246d4c4225a1d17d7ff79a7526dKristian Høgsberg Kristensen * the bit-inverse, down-shifted writemask to determine the length of 39930cb7d7b4b7c32246d4c4225a1d17d7ff79a7526dKristian Høgsberg Kristensen * the block of enabled bits. 39940cb7d7b4b7c32246d4c4225a1d17d7ff79a7526dKristian Høgsberg Kristensen */ 39950cb7d7b4b7c32246d4c4225a1d17d7ff79a7526dKristian Høgsberg Kristensen while (writemask) { 39960cb7d7b4b7c32246d4c4225a1d17d7ff79a7526dKristian Høgsberg Kristensen unsigned first_component = ffs(writemask) - 1; 39970cb7d7b4b7c32246d4c4225a1d17d7ff79a7526dKristian Høgsberg Kristensen unsigned length = ffs(~(writemask >> first_component)) - 1; 3998337dad8ceeb4f313a47b4ddb31805f355c3fc3a5Iago Toral Quiroga 3999943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga /* We can't write more than 2 64-bit components at once. Limit the 4000943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga * length of the write to what we can do and let the next iteration 4001943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga * handle the rest 4002943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga */ 4003943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga if (type_size > 4) 4004943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga length = MIN2(2, length); 4005943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga 400678b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand fs_reg offset_reg; 400778b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand nir_const_value *const_offset = nir_src_as_const_value(instr->src[2]); 400878b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand if (const_offset) { 4009943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga offset_reg = brw_imm_ud(const_offset->u32[0] + 4010943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga type_size * first_component); 40110cb7d7b4b7c32246d4c4225a1d17d7ff79a7526dKristian Høgsberg Kristensen } else { 40120cb7d7b4b7c32246d4c4225a1d17d7ff79a7526dKristian Høgsberg Kristensen offset_reg = vgrf(glsl_type::uint_type); 40130cb7d7b4b7c32246d4c4225a1d17d7ff79a7526dKristian Høgsberg Kristensen bld.ADD(offset_reg, 40140cb7d7b4b7c32246d4c4225a1d17d7ff79a7526dKristian Høgsberg Kristensen retype(get_nir_src(instr->src[2]), BRW_REGISTER_TYPE_UD), 4015943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga brw_imm_ud(type_size * first_component)); 4016337dad8ceeb4f313a47b4ddb31805f355c3fc3a5Iago Toral Quiroga } 4017337dad8ceeb4f313a47b4ddb31805f355c3fc3a5Iago Toral Quiroga 4018943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga 40190cb7d7b4b7c32246d4c4225a1d17d7ff79a7526dKristian Høgsberg Kristensen emit_untyped_write(bld, surf_index, offset_reg, 4020943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga offset(val_reg, bld, first_component * type_slots), 4021943f9442bf7943a992730e642e91ed874d50790cIago Toral Quiroga 1 /* dims */, length * type_slots, 40220cb7d7b4b7c32246d4c4225a1d17d7ff79a7526dKristian Høgsberg Kristensen BRW_PREDICATE_NONE); 40230cb7d7b4b7c32246d4c4225a1d17d7ff79a7526dKristian Høgsberg Kristensen 40240cb7d7b4b7c32246d4c4225a1d17d7ff79a7526dKristian Høgsberg Kristensen /* Clear the bits in the writemask that we just wrote, then try 40250cb7d7b4b7c32246d4c4225a1d17d7ff79a7526dKristian Høgsberg Kristensen * again to see if more channels are left. 40260cb7d7b4b7c32246d4c4225a1d17d7ff79a7526dKristian Høgsberg Kristensen */ 40270cb7d7b4b7c32246d4c4225a1d17d7ff79a7526dKristian Høgsberg Kristensen writemask &= (15 << (first_component + length)); 4028337dad8ceeb4f313a47b4ddb31805f355c3fc3a5Iago Toral Quiroga } 4029337dad8ceeb4f313a47b4ddb31805f355c3fc3a5Iago Toral Quiroga break; 4030337dad8ceeb4f313a47b4ddb31805f355c3fc3a5Iago Toral Quiroga } 4031337dad8ceeb4f313a47b4ddb31805f355c3fc3a5Iago Toral Quiroga 403227663dbe8edfb7583d9d8fc3704a04a5c837fe05Jason Ekstrand case nir_intrinsic_store_output: { 40332faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott fs_reg src = get_nir_src(instr->src[0]); 4034f3970fad9e5b04e04de366a65fed5a30da618f9dJason Ekstrand 403578b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); 403678b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand assert(const_offset && "Indirect output stores not allowed"); 403759864e8e02057cc6fa0448a8af067a3cf53389daKenneth Graunke fs_reg new_dest = retype(offset(outputs[instr->const_index[0]], bld, 403859864e8e02057cc6fa0448a8af067a3cf53389daKenneth Graunke 4 * const_offset->u32[0]), src.type); 403978b81be627734ea7fa50ea246c07b0d4a3a1638aJason Ekstrand 404066192b3c16b09fa7ba97574103fc3d883b3cbfdbIago Toral Quiroga unsigned num_components = instr->num_components; 40417f53fead5cf9a85c74a94d359dd5fccfbb87856cTimothy Arceri unsigned first_component = nir_intrinsic_component(instr); 404266192b3c16b09fa7ba97574103fc3d883b3cbfdbIago Toral Quiroga unsigned bit_size = instr->src[0].is_ssa ? 404366192b3c16b09fa7ba97574103fc3d883b3cbfdbIago Toral Quiroga instr->src[0].ssa->bit_size : instr->src[0].reg.reg->bit_size; 404466192b3c16b09fa7ba97574103fc3d883b3cbfdbIago Toral Quiroga if (bit_size == 64) { 404566192b3c16b09fa7ba97574103fc3d883b3cbfdbIago Toral Quiroga fs_reg tmp = 404666192b3c16b09fa7ba97574103fc3d883b3cbfdbIago Toral Quiroga fs_reg(VGRF, alloc.allocate(2 * num_components), 404766192b3c16b09fa7ba97574103fc3d883b3cbfdbIago Toral Quiroga BRW_REGISTER_TYPE_F); 404866192b3c16b09fa7ba97574103fc3d883b3cbfdbIago Toral Quiroga shuffle_64bit_data_for_32bit_write( 404966192b3c16b09fa7ba97574103fc3d883b3cbfdbIago Toral Quiroga bld, tmp, retype(src, BRW_REGISTER_TYPE_DF), num_components); 405066192b3c16b09fa7ba97574103fc3d883b3cbfdbIago Toral Quiroga src = retype(tmp, src.type); 405166192b3c16b09fa7ba97574103fc3d883b3cbfdbIago Toral Quiroga num_components *= 2; 405266192b3c16b09fa7ba97574103fc3d883b3cbfdbIago Toral Quiroga } 405366192b3c16b09fa7ba97574103fc3d883b3cbfdbIago Toral Quiroga 405466192b3c16b09fa7ba97574103fc3d883b3cbfdbIago Toral Quiroga for (unsigned j = 0; j < num_components; j++) { 40557f53fead5cf9a85c74a94d359dd5fccfbb87856cTimothy Arceri bld.MOV(offset(new_dest, bld, j + first_component), 40567f53fead5cf9a85c74a94d359dd5fccfbb87856cTimothy Arceri offset(src, bld, j)); 40572faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott } 40582faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 40592faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott } 40602faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 406114af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga case nir_intrinsic_ssbo_atomic_add: 406214af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga nir_emit_ssbo_atomic(bld, BRW_AOP_ADD, instr); 406314af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga break; 4064baee16bf02eedc6a32381d79da6c7ac942f782aeIago Toral Quiroga case nir_intrinsic_ssbo_atomic_imin: 4065baee16bf02eedc6a32381d79da6c7ac942f782aeIago Toral Quiroga nir_emit_ssbo_atomic(bld, BRW_AOP_IMIN, instr); 406614af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga break; 4067baee16bf02eedc6a32381d79da6c7ac942f782aeIago Toral Quiroga case nir_intrinsic_ssbo_atomic_umin: 4068baee16bf02eedc6a32381d79da6c7ac942f782aeIago Toral Quiroga nir_emit_ssbo_atomic(bld, BRW_AOP_UMIN, instr); 4069baee16bf02eedc6a32381d79da6c7ac942f782aeIago Toral Quiroga break; 4070baee16bf02eedc6a32381d79da6c7ac942f782aeIago Toral Quiroga case nir_intrinsic_ssbo_atomic_imax: 4071baee16bf02eedc6a32381d79da6c7ac942f782aeIago Toral Quiroga nir_emit_ssbo_atomic(bld, BRW_AOP_IMAX, instr); 4072baee16bf02eedc6a32381d79da6c7ac942f782aeIago Toral Quiroga break; 4073baee16bf02eedc6a32381d79da6c7ac942f782aeIago Toral Quiroga case nir_intrinsic_ssbo_atomic_umax: 4074baee16bf02eedc6a32381d79da6c7ac942f782aeIago Toral Quiroga nir_emit_ssbo_atomic(bld, BRW_AOP_UMAX, instr); 407514af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga break; 407614af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga case nir_intrinsic_ssbo_atomic_and: 407714af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga nir_emit_ssbo_atomic(bld, BRW_AOP_AND, instr); 407814af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga break; 407914af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga case nir_intrinsic_ssbo_atomic_or: 408014af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga nir_emit_ssbo_atomic(bld, BRW_AOP_OR, instr); 408114af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga break; 408214af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga case nir_intrinsic_ssbo_atomic_xor: 408314af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga nir_emit_ssbo_atomic(bld, BRW_AOP_XOR, instr); 408414af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga break; 408514af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga case nir_intrinsic_ssbo_atomic_exchange: 408614af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga nir_emit_ssbo_atomic(bld, BRW_AOP_MOV, instr); 408714af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga break; 408814af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga case nir_intrinsic_ssbo_atomic_comp_swap: 408914af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga nir_emit_ssbo_atomic(bld, BRW_AOP_CMPWR, instr); 409014af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga break; 409114af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga 4092f5dd2c182275a9de57e5186491012c402a6248e0Samuel Iglesias Gonsálvez case nir_intrinsic_get_buffer_size: { 4093f5dd2c182275a9de57e5186491012c402a6248e0Samuel Iglesias Gonsálvez nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]); 4094084b24f5582567ebf5aa94b7f40ae3bdcb71316bIago Toral Quiroga unsigned ssbo_index = const_uniform_block ? const_uniform_block->u32[0] : 0; 4095f5dd2c182275a9de57e5186491012c402a6248e0Samuel Iglesias Gonsálvez 4096796238d9e6eee0b942d34c57bd8bdf0f9c98b6c3Francisco Jerez /* A resinfo's sampler message is used to get the buffer size. The 4097796238d9e6eee0b942d34c57bd8bdf0f9c98b6c3Francisco Jerez * SIMD8's writeback message consists of four registers and SIMD16's 4098796238d9e6eee0b942d34c57bd8bdf0f9c98b6c3Francisco Jerez * writeback message consists of 8 destination registers (two per each 4099796238d9e6eee0b942d34c57bd8bdf0f9c98b6c3Francisco Jerez * component). Because we are only interested on the first channel of 4100796238d9e6eee0b942d34c57bd8bdf0f9c98b6c3Francisco Jerez * the first returned component, where resinfo returns the buffer size 4101796238d9e6eee0b942d34c57bd8bdf0f9c98b6c3Francisco Jerez * for SURFTYPE_BUFFER, we can just use the SIMD8 variant regardless of 4102796238d9e6eee0b942d34c57bd8bdf0f9c98b6c3Francisco Jerez * the dispatch width. 4103796238d9e6eee0b942d34c57bd8bdf0f9c98b6c3Francisco Jerez */ 4104796238d9e6eee0b942d34c57bd8bdf0f9c98b6c3Francisco Jerez const fs_builder ubld = bld.exec_all().group(8, 0); 4105796238d9e6eee0b942d34c57bd8bdf0f9c98b6c3Francisco Jerez fs_reg src_payload = ubld.vgrf(BRW_REGISTER_TYPE_UD); 4106796238d9e6eee0b942d34c57bd8bdf0f9c98b6c3Francisco Jerez fs_reg ret_payload = ubld.vgrf(BRW_REGISTER_TYPE_UD, 4); 4107f5dd2c182275a9de57e5186491012c402a6248e0Samuel Iglesias Gonsálvez 4108796238d9e6eee0b942d34c57bd8bdf0f9c98b6c3Francisco Jerez /* Set LOD = 0 */ 4109796238d9e6eee0b942d34c57bd8bdf0f9c98b6c3Francisco Jerez ubld.MOV(src_payload, brw_imm_d(0)); 4110d4fdb84f80dd3dbad2b71ea6e877f24dc625aa2aSamuel Iglesias Gonsálvez 4111eea3c907cc480a105224b21be51d62bc64ea1057Iago Toral Quiroga const unsigned index = prog_data->binding_table.ssbo_start + ssbo_index; 4112796238d9e6eee0b942d34c57bd8bdf0f9c98b6c3Francisco Jerez fs_inst *inst = ubld.emit(FS_OPCODE_GET_BUFFER_SIZE, ret_payload, 4113796238d9e6eee0b942d34c57bd8bdf0f9c98b6c3Francisco Jerez src_payload, brw_imm_ud(index)); 4114f5dd2c182275a9de57e5186491012c402a6248e0Samuel Iglesias Gonsálvez inst->header_size = 0; 4115796238d9e6eee0b942d34c57bd8bdf0f9c98b6c3Francisco Jerez inst->mlen = 1; 411669570bbad876bb9da609c3b651aacda28cecc542Francisco Jerez inst->size_written = 4 * REG_SIZE; 4117eea3c907cc480a105224b21be51d62bc64ea1057Iago Toral Quiroga 4118796238d9e6eee0b942d34c57bd8bdf0f9c98b6c3Francisco Jerez bld.MOV(retype(dest, ret_payload.type), component(ret_payload, 0)); 4119eea3c907cc480a105224b21be51d62bc64ea1057Iago Toral Quiroga brw_mark_surface_used(prog_data, index); 4120f5dd2c182275a9de57e5186491012c402a6248e0Samuel Iglesias Gonsálvez break; 4121f5dd2c182275a9de57e5186491012c402a6248e0Samuel Iglesias Gonsálvez } 4122f5dd2c182275a9de57e5186491012c402a6248e0Samuel Iglesias Gonsálvez 41238f48d23e0fcc0809f6397a67c26751a45a95e076Jordan Justen case nir_intrinsic_load_channel_num: { 41248f48d23e0fcc0809f6397a67c26751a45a95e076Jordan Justen fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UW); 41258f48d23e0fcc0809f6397a67c26751a45a95e076Jordan Justen dest = retype(dest, BRW_REGISTER_TYPE_UD); 41268f48d23e0fcc0809f6397a67c26751a45a95e076Jordan Justen const fs_builder allbld8 = bld.group(8, 0).exec_all(); 41278f48d23e0fcc0809f6397a67c26751a45a95e076Jordan Justen allbld8.MOV(tmp, brw_imm_v(0x76543210)); 41288f48d23e0fcc0809f6397a67c26751a45a95e076Jordan Justen if (dispatch_width > 8) 41298f48d23e0fcc0809f6397a67c26751a45a95e076Jordan Justen allbld8.ADD(byte_offset(tmp, 16), tmp, brw_imm_uw(8u)); 41308f48d23e0fcc0809f6397a67c26751a45a95e076Jordan Justen if (dispatch_width > 16) { 41318f48d23e0fcc0809f6397a67c26751a45a95e076Jordan Justen const fs_builder allbld16 = bld.group(16, 0).exec_all(); 41328f48d23e0fcc0809f6397a67c26751a45a95e076Jordan Justen allbld16.ADD(byte_offset(tmp, 32), tmp, brw_imm_uw(16u)); 41338f48d23e0fcc0809f6397a67c26751a45a95e076Jordan Justen } 41348f48d23e0fcc0809f6397a67c26751a45a95e076Jordan Justen bld.MOV(dest, tmp); 41358f48d23e0fcc0809f6397a67c26751a45a95e076Jordan Justen break; 41368f48d23e0fcc0809f6397a67c26751a45a95e076Jordan Justen } 41378f48d23e0fcc0809f6397a67c26751a45a95e076Jordan Justen 41382faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott default: 41392faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott unreachable("unknown intrinsic"); 41402faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott } 41412faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott} 41422faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 41432faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbottvoid 414414af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quirogafs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld, 414514af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga int op, nir_intrinsic_instr *instr) 414614af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga{ 41473fb289f957a8a27349a6f7df03983f92d9b6cf64Jason Ekstrand if (stage == MESA_SHADER_FRAGMENT) 4148e51e055fcdf8107aafaba358fa65b00f963e1728Kenneth Graunke brw_wm_prog_data(prog_data)->has_side_effects = true; 41493fb289f957a8a27349a6f7df03983f92d9b6cf64Jason Ekstrand 415014af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga fs_reg dest; 415114af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga if (nir_intrinsic_infos[instr->intrinsic].has_dest) 415214af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga dest = get_nir_dest(instr->dest); 415314af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga 415414af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga fs_reg surface; 415514af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]); 415614af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga if (const_surface) { 4157d3f45888045c84b2bc382a34d169a0ede4774a24Iago Toral Quiroga unsigned surf_index = stage_prog_data->binding_table.ssbo_start + 4158084b24f5582567ebf5aa94b7f40ae3bdcb71316bIago Toral Quiroga const_surface->u32[0]; 41593ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner surface = brw_imm_ud(surf_index); 416014af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga brw_mark_surface_used(prog_data, surf_index); 416114af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga } else { 416214af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga surface = vgrf(glsl_type::uint_type); 416314af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga bld.ADD(surface, get_nir_src(instr->src[0]), 41643ccc41ecfc5e9345a1c291748d8840984f7413aeMatt Turner brw_imm_ud(stage_prog_data->binding_table.ssbo_start)); 416514af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga 4166d3f45888045c84b2bc382a34d169a0ede4774a24Iago Toral Quiroga /* Assume this may touch any SSBO. This is the same we do for other 416714af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga * UBO/SSBO accesses with non-constant surface. 416814af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga */ 416914af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga brw_mark_surface_used(prog_data, 4170d3f45888045c84b2bc382a34d169a0ede4774a24Iago Toral Quiroga stage_prog_data->binding_table.ssbo_start + 4171e1af20f18a86f52a9640faf2d4ff8a71b0a4fa9bTimothy Arceri nir->info->num_ssbos - 1); 417214af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga } 417314af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga 417414af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga fs_reg offset = get_nir_src(instr->src[1]); 417514af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga fs_reg data1 = get_nir_src(instr->src[2]); 417614af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga fs_reg data2; 417714af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga if (op == BRW_AOP_CMPWR) 417814af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga data2 = get_nir_src(instr->src[3]); 417914af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga 4180be1c53d2cf2b12655ff69caac49cca75a55e63e0Kenneth Graunke /* Emit the actual atomic operation */ 418114af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga 4182ef76ea4ba97d0ac122491fd3f1b2bbb8e4163150Alejandro Piñeiro fs_reg atomic_result = emit_untyped_atomic(bld, surface, offset, 4183ef76ea4ba97d0ac122491fd3f1b2bbb8e4163150Alejandro Piñeiro data1, data2, 4184ef76ea4ba97d0ac122491fd3f1b2bbb8e4163150Alejandro Piñeiro 1 /* dims */, 1 /* rsize */, 4185ef76ea4ba97d0ac122491fd3f1b2bbb8e4163150Alejandro Piñeiro op, 4186ef76ea4ba97d0ac122491fd3f1b2bbb8e4163150Alejandro Piñeiro BRW_PREDICATE_NONE); 4187e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen dest.type = atomic_result.type; 4188e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen bld.MOV(dest, atomic_result); 4189e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen} 4190e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen 4191e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justenvoid 4192e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justenfs_visitor::nir_emit_shared_atomic(const fs_builder &bld, 4193e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen int op, nir_intrinsic_instr *instr) 4194e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen{ 4195e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen fs_reg dest; 4196e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen if (nir_intrinsic_infos[instr->intrinsic].has_dest) 4197e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen dest = get_nir_dest(instr->dest); 4198e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen 4199e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen fs_reg surface = brw_imm_ud(GEN7_BTI_SLM); 420076e161056a424e5b9c35b02a9f4e520c8c44cf2bKenneth Graunke fs_reg offset; 4201e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen fs_reg data1 = get_nir_src(instr->src[1]); 4202e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen fs_reg data2; 4203e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen if (op == BRW_AOP_CMPWR) 4204e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen data2 = get_nir_src(instr->src[2]); 4205e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen 420676e161056a424e5b9c35b02a9f4e520c8c44cf2bKenneth Graunke /* Get the offset */ 420776e161056a424e5b9c35b02a9f4e520c8c44cf2bKenneth Graunke nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); 420876e161056a424e5b9c35b02a9f4e520c8c44cf2bKenneth Graunke if (const_offset) { 420976e161056a424e5b9c35b02a9f4e520c8c44cf2bKenneth Graunke offset = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]); 421076e161056a424e5b9c35b02a9f4e520c8c44cf2bKenneth Graunke } else { 421176e161056a424e5b9c35b02a9f4e520c8c44cf2bKenneth Graunke offset = vgrf(glsl_type::uint_type); 421276e161056a424e5b9c35b02a9f4e520c8c44cf2bKenneth Graunke bld.ADD(offset, 421376e161056a424e5b9c35b02a9f4e520c8c44cf2bKenneth Graunke retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD), 421476e161056a424e5b9c35b02a9f4e520c8c44cf2bKenneth Graunke brw_imm_ud(instr->const_index[0])); 421576e161056a424e5b9c35b02a9f4e520c8c44cf2bKenneth Graunke } 421676e161056a424e5b9c35b02a9f4e520c8c44cf2bKenneth Graunke 4217e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen /* Emit the actual atomic operation operation */ 4218e288b4a133f1ea8208cd219545a72805ed5a91c6Jordan Justen 4219ef76ea4ba97d0ac122491fd3f1b2bbb8e4163150Alejandro Piñeiro fs_reg atomic_result = emit_untyped_atomic(bld, surface, offset, 4220ef76ea4ba97d0ac122491fd3f1b2bbb8e4163150Alejandro Piñeiro data1, data2, 4221ef76ea4ba97d0ac122491fd3f1b2bbb8e4163150Alejandro Piñeiro 1 /* dims */, 1 /* rsize */, 4222ef76ea4ba97d0ac122491fd3f1b2bbb8e4163150Alejandro Piñeiro op, 4223ef76ea4ba97d0ac122491fd3f1b2bbb8e4163150Alejandro Piñeiro BRW_PREDICATE_NONE); 422414af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga dest.type = atomic_result.type; 422514af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga bld.MOV(dest, atomic_result); 422614af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga} 422714af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quiroga 422814af6f4698a9f60c080b9adda4d3b4c45b157bd7Iago Toral Quirogavoid 4229bf83a1a219af8bf82c3c721888bbe0dfc3eced34Francisco Jerezfs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) 42302faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott{ 4231ee85014b90af1d94d637ec763a803479e9bac5dcJason Ekstrand unsigned texture = instr->texture_index; 4232b8ab9c8c8674d67e09c1134ca44b37e0a611f5b5Jason Ekstrand unsigned sampler = instr->sampler_index; 42332faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 4234a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand fs_reg srcs[TEX_LOGICAL_NUM_SRCS]; 42352faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 4236a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand srcs[TEX_LOGICAL_SRC_SURFACE] = brw_imm_ud(texture); 4237a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand srcs[TEX_LOGICAL_SRC_SAMPLER] = brw_imm_ud(sampler); 42382faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 4239cfc175b40995ca4e590cd30897f6bb017e1376a3Chad Versace int lod_components = 0; 42402faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 424180c72a8ea7b1018661da0e6509a7f88ca1f5086fJason Ekstrand /* The hardware requires a LOD for buffer textures */ 424280c72a8ea7b1018661da0e6509a7f88ca1f5086fJason Ekstrand if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) 4243a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand srcs[TEX_LOGICAL_SRC_LOD] = brw_imm_d(0); 424480c72a8ea7b1018661da0e6509a7f88ca1f5086fJason Ekstrand 4245faf20df143a63e58aa729446f21c38ae39a438f2Jason Ekstrand uint32_t header_bits = 0; 42462faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott for (unsigned i = 0; i < instr->num_srcs; i++) { 42474aa6162f6ecf96c7400c17c310eba0cfd0f5e083Jason Ekstrand fs_reg src = get_nir_src(instr->src[i].src); 42484aa6162f6ecf96c7400c17c310eba0cfd0f5e083Jason Ekstrand switch (instr->src[i].src_type) { 42492faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_tex_src_bias: 425007353599e07529e98494057f556b9d96c1df5cfdMatt Turner srcs[TEX_LOGICAL_SRC_LOD] = 425107353599e07529e98494057f556b9d96c1df5cfdMatt Turner retype(get_nir_src_imm(instr->src[i].src), BRW_REGISTER_TYPE_F); 42522faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 4253fd249c803e3ae2acb83f5e3b7152728e73228b7bIlia Mirkin case nir_tex_src_comparator: 4254a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand srcs[TEX_LOGICAL_SRC_SHADOW_C] = retype(src, BRW_REGISTER_TYPE_F); 42552faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 42562faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_tex_src_coord: 4257c181ff268e4787056fdee417d30d52b1098fe211Jason Ekstrand switch (instr->op) { 4258c181ff268e4787056fdee417d30d52b1098fe211Jason Ekstrand case nir_texop_txf: 4259c181ff268e4787056fdee417d30d52b1098fe211Jason Ekstrand case nir_texop_txf_ms: 426050e5e1f747ad820eb491e093600a4bde9c13efbaJason Ekstrand case nir_texop_txf_ms_mcs: 426199840eb983f74cd447546f7205c8c9f505ef82c8Ian Romanick case nir_texop_samples_identical: 4262a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand srcs[TEX_LOGICAL_SRC_COORDINATE] = retype(src, BRW_REGISTER_TYPE_D); 4263c181ff268e4787056fdee417d30d52b1098fe211Jason Ekstrand break; 4264c181ff268e4787056fdee417d30d52b1098fe211Jason Ekstrand default: 4265a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand srcs[TEX_LOGICAL_SRC_COORDINATE] = retype(src, BRW_REGISTER_TYPE_F); 4266c181ff268e4787056fdee417d30d52b1098fe211Jason Ekstrand break; 4267c181ff268e4787056fdee417d30d52b1098fe211Jason Ekstrand } 42682faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 42692faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_tex_src_ddx: 4270a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand srcs[TEX_LOGICAL_SRC_LOD] = retype(src, BRW_REGISTER_TYPE_F); 42712faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott lod_components = nir_tex_instr_src_size(instr, i); 42722faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 42732faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_tex_src_ddy: 4274a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand srcs[TEX_LOGICAL_SRC_LOD2] = retype(src, BRW_REGISTER_TYPE_F); 42752faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 42762faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_tex_src_lod: 4277c181ff268e4787056fdee417d30d52b1098fe211Jason Ekstrand switch (instr->op) { 4278c181ff268e4787056fdee417d30d52b1098fe211Jason Ekstrand case nir_texop_txs: 427907353599e07529e98494057f556b9d96c1df5cfdMatt Turner srcs[TEX_LOGICAL_SRC_LOD] = 428007353599e07529e98494057f556b9d96c1df5cfdMatt Turner retype(get_nir_src_imm(instr->src[i].src), BRW_REGISTER_TYPE_UD); 4281c181ff268e4787056fdee417d30d52b1098fe211Jason Ekstrand break; 4282c181ff268e4787056fdee417d30d52b1098fe211Jason Ekstrand case nir_texop_txf: 428307353599e07529e98494057f556b9d96c1df5cfdMatt Turner srcs[TEX_LOGICAL_SRC_LOD] = 428407353599e07529e98494057f556b9d96c1df5cfdMatt Turner retype(get_nir_src_imm(instr->src[i].src), BRW_REGISTER_TYPE_D); 4285c181ff268e4787056fdee417d30d52b1098fe211Jason Ekstrand break; 4286c181ff268e4787056fdee417d30d52b1098fe211Jason Ekstrand default: 428707353599e07529e98494057f556b9d96c1df5cfdMatt Turner srcs[TEX_LOGICAL_SRC_LOD] = 428807353599e07529e98494057f556b9d96c1df5cfdMatt Turner retype(get_nir_src_imm(instr->src[i].src), BRW_REGISTER_TYPE_F); 4289c181ff268e4787056fdee417d30d52b1098fe211Jason Ekstrand break; 4290c181ff268e4787056fdee417d30d52b1098fe211Jason Ekstrand } 42912faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 42922faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_tex_src_ms_index: 4293a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand srcs[TEX_LOGICAL_SRC_SAMPLE_INDEX] = retype(src, BRW_REGISTER_TYPE_UD); 42942faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 42958750299a420af76cebd3067f6f603eacde06ae06Jason Ekstrand 42968750299a420af76cebd3067f6f603eacde06ae06Jason Ekstrand case nir_tex_src_offset: { 42978750299a420af76cebd3067f6f603eacde06ae06Jason Ekstrand nir_const_value *const_offset = 42988750299a420af76cebd3067f6f603eacde06ae06Jason Ekstrand nir_src_as_const_value(instr->src[i].src); 42994f2d1d6ea713df8f8d816b48b9e99c7117cf36d7Ilia Mirkin unsigned offset_bits = 0; 43004f2d1d6ea713df8f8d816b48b9e99c7117cf36d7Ilia Mirkin if (const_offset && 43014f2d1d6ea713df8f8d816b48b9e99c7117cf36d7Ilia Mirkin brw_texture_offset(const_offset->i32, 43024f2d1d6ea713df8f8d816b48b9e99c7117cf36d7Ilia Mirkin nir_tex_instr_src_size(instr, i), 43034f2d1d6ea713df8f8d816b48b9e99c7117cf36d7Ilia Mirkin &offset_bits)) { 43044f2d1d6ea713df8f8d816b48b9e99c7117cf36d7Ilia Mirkin header_bits |= offset_bits; 43058750299a420af76cebd3067f6f603eacde06ae06Jason Ekstrand } else { 4306faf20df143a63e58aa729446f21c38ae39a438f2Jason Ekstrand srcs[TEX_LOGICAL_SRC_TG4_OFFSET] = 4307a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand retype(src, BRW_REGISTER_TYPE_D); 43088750299a420af76cebd3067f6f603eacde06ae06Jason Ekstrand } 43092faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 43108750299a420af76cebd3067f6f603eacde06ae06Jason Ekstrand } 43118750299a420af76cebd3067f6f603eacde06ae06Jason Ekstrand 43122faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_tex_src_projector: 4313cb966fb2bea77b1d7b1bdb6597b7b85d810f2d0aEric Anholt unreachable("should be lowered"); 43143c2c0a164c2308a5777d7a59b6da4b44a57ba6e2Jason Ekstrand 4315ee85014b90af1d94d637ec763a803479e9bac5dcJason Ekstrand case nir_tex_src_texture_offset: { 4316ee85014b90af1d94d637ec763a803479e9bac5dcJason Ekstrand /* Figure out the highest possible texture index and mark it as used */ 4317ee85014b90af1d94d637ec763a803479e9bac5dcJason Ekstrand uint32_t max_used = texture + instr->texture_array_size - 1; 431828e9601d0e681411b60a7de8be9f401b0df77d29Jason Ekstrand if (instr->op == nir_texop_tg4 && devinfo->gen < 8) { 43193c2c0a164c2308a5777d7a59b6da4b44a57ba6e2Jason Ekstrand max_used += stage_prog_data->binding_table.gather_texture_start; 43203c2c0a164c2308a5777d7a59b6da4b44a57ba6e2Jason Ekstrand } else { 43213c2c0a164c2308a5777d7a59b6da4b44a57ba6e2Jason Ekstrand max_used += stage_prog_data->binding_table.texture_start; 43223c2c0a164c2308a5777d7a59b6da4b44a57ba6e2Jason Ekstrand } 43233c2c0a164c2308a5777d7a59b6da4b44a57ba6e2Jason Ekstrand brw_mark_surface_used(prog_data, max_used); 43243c2c0a164c2308a5777d7a59b6da4b44a57ba6e2Jason Ekstrand 43253c2c0a164c2308a5777d7a59b6da4b44a57ba6e2Jason Ekstrand /* Emit code to evaluate the actual indexing expression */ 4326a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand fs_reg tmp = vgrf(glsl_type::uint_type); 4327a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand bld.ADD(tmp, src, brw_imm_ud(texture)); 4328a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand srcs[TEX_LOGICAL_SRC_SURFACE] = bld.emit_uniformize(tmp); 43293c2c0a164c2308a5777d7a59b6da4b44a57ba6e2Jason Ekstrand break; 43303c2c0a164c2308a5777d7a59b6da4b44a57ba6e2Jason Ekstrand } 43313c2c0a164c2308a5777d7a59b6da4b44a57ba6e2Jason Ekstrand 4332b8ab9c8c8674d67e09c1134ca44b37e0a611f5b5Jason Ekstrand case nir_tex_src_sampler_offset: { 4333b8ab9c8c8674d67e09c1134ca44b37e0a611f5b5Jason Ekstrand /* Emit code to evaluate the actual indexing expression */ 4334a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand fs_reg tmp = vgrf(glsl_type::uint_type); 4335a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand bld.ADD(tmp, src, brw_imm_ud(sampler)); 4336a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand srcs[TEX_LOGICAL_SRC_SAMPLER] = bld.emit_uniformize(tmp); 4337b8ab9c8c8674d67e09c1134ca44b37e0a611f5b5Jason Ekstrand break; 4338b8ab9c8c8674d67e09c1134ca44b37e0a611f5b5Jason Ekstrand } 43395ec456375e4fdd0b6c7d797f99191044e19ead74Jason Ekstrand 434050e5e1f747ad820eb491e093600a4bde9c13efbaJason Ekstrand case nir_tex_src_ms_mcs: 434150e5e1f747ad820eb491e093600a4bde9c13efbaJason Ekstrand assert(instr->op == nir_texop_txf_ms); 434250e5e1f747ad820eb491e093600a4bde9c13efbaJason Ekstrand srcs[TEX_LOGICAL_SRC_MCS] = retype(src, BRW_REGISTER_TYPE_D); 434350e5e1f747ad820eb491e093600a4bde9c13efbaJason Ekstrand break; 434450e5e1f747ad820eb491e093600a4bde9c13efbaJason Ekstrand 434544997fc0c1cc7f24216e3b1c5d954919df946ee5Kristian Høgsberg case nir_tex_src_plane: { 434644997fc0c1cc7f24216e3b1c5d954919df946ee5Kristian Høgsberg nir_const_value *const_plane = 434744997fc0c1cc7f24216e3b1c5d954919df946ee5Kristian Høgsberg nir_src_as_const_value(instr->src[i].src); 434844997fc0c1cc7f24216e3b1c5d954919df946ee5Kristian Høgsberg const uint32_t plane = const_plane->u32[0]; 434944997fc0c1cc7f24216e3b1c5d954919df946ee5Kristian Høgsberg const uint32_t texture_index = 435044997fc0c1cc7f24216e3b1c5d954919df946ee5Kristian Høgsberg instr->texture_index + 435144997fc0c1cc7f24216e3b1c5d954919df946ee5Kristian Høgsberg stage_prog_data->binding_table.plane_start[plane] - 435244997fc0c1cc7f24216e3b1c5d954919df946ee5Kristian Høgsberg stage_prog_data->binding_table.texture_start; 435344997fc0c1cc7f24216e3b1c5d954919df946ee5Kristian Høgsberg 435444997fc0c1cc7f24216e3b1c5d954919df946ee5Kristian Høgsberg srcs[TEX_LOGICAL_SRC_SURFACE] = brw_imm_ud(texture_index); 435544997fc0c1cc7f24216e3b1c5d954919df946ee5Kristian Høgsberg break; 435644997fc0c1cc7f24216e3b1c5d954919df946ee5Kristian Høgsberg } 435744997fc0c1cc7f24216e3b1c5d954919df946ee5Kristian Høgsberg 43582faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott default: 43592faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott unreachable("unknown texture source"); 43602faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott } 43612faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott } 43622faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 436350e5e1f747ad820eb491e093600a4bde9c13efbaJason Ekstrand if (srcs[TEX_LOGICAL_SRC_MCS].file == BAD_FILE && 436450e5e1f747ad820eb491e093600a4bde9c13efbaJason Ekstrand (instr->op == nir_texop_txf_ms || 436550e5e1f747ad820eb491e093600a4bde9c13efbaJason Ekstrand instr->op == nir_texop_samples_identical)) { 436628e9601d0e681411b60a7de8be9f401b0df77d29Jason Ekstrand if (devinfo->gen >= 7 && 4367ee85014b90af1d94d637ec763a803479e9bac5dcJason Ekstrand key_tex->compressed_multisample_layout_mask & (1 << texture)) { 4368a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand srcs[TEX_LOGICAL_SRC_MCS] = 4369a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand emit_mcs_fetch(srcs[TEX_LOGICAL_SRC_COORDINATE], 4370a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand instr->coord_components, 4371a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand srcs[TEX_LOGICAL_SRC_SURFACE]); 43721d8ef6ba606a88239de633e5abcc19471c9d3cf4Kenneth Graunke } else { 4373a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand srcs[TEX_LOGICAL_SRC_MCS] = brw_imm_ud(0u); 43741d8ef6ba606a88239de633e5abcc19471c9d3cf4Kenneth Graunke } 43752faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott } 43762faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 4377a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_d(instr->coord_components); 4378a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_d(lod_components); 4379a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand 4380a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand enum opcode opcode; 43812faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott switch (instr->op) { 4382a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand case nir_texop_tex: 43830c754d1c4203d87dbb9d2dd882ef42686e6d01ecFrancisco Jerez opcode = (stage == MESA_SHADER_FRAGMENT ? SHADER_OPCODE_TEX_LOGICAL : 43840c754d1c4203d87dbb9d2dd882ef42686e6d01ecFrancisco Jerez SHADER_OPCODE_TXL_LOGICAL); 4385a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand break; 4386a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand case nir_texop_txb: 4387a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand opcode = FS_OPCODE_TXB_LOGICAL; 4388a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand break; 4389a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand case nir_texop_txl: 4390a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand opcode = SHADER_OPCODE_TXL_LOGICAL; 4391a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand break; 4392a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand case nir_texop_txd: 4393a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand opcode = SHADER_OPCODE_TXD_LOGICAL; 4394a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand break; 4395a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand case nir_texop_txf: 4396a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand opcode = SHADER_OPCODE_TXF_LOGICAL; 4397a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand break; 4398a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand case nir_texop_txf_ms: 4399a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand if ((key_tex->msaa_16 & (1 << sampler))) 4400a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand opcode = SHADER_OPCODE_TXF_CMS_W_LOGICAL; 4401a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand else 4402a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand opcode = SHADER_OPCODE_TXF_CMS_LOGICAL; 4403a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand break; 440450e5e1f747ad820eb491e093600a4bde9c13efbaJason Ekstrand case nir_texop_txf_ms_mcs: 440550e5e1f747ad820eb491e093600a4bde9c13efbaJason Ekstrand opcode = SHADER_OPCODE_TXF_MCS_LOGICAL; 440650e5e1f747ad820eb491e093600a4bde9c13efbaJason Ekstrand break; 4407a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand case nir_texop_query_levels: 4408a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand case nir_texop_txs: 4409a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand opcode = SHADER_OPCODE_TXS_LOGICAL; 4410a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand break; 4411a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand case nir_texop_lod: 4412a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand opcode = SHADER_OPCODE_LOD_LOGICAL; 4413a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand break; 4414a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand case nir_texop_tg4: 4415faf20df143a63e58aa729446f21c38ae39a438f2Jason Ekstrand if (srcs[TEX_LOGICAL_SRC_TG4_OFFSET].file != BAD_FILE) 4416a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand opcode = SHADER_OPCODE_TG4_OFFSET_LOGICAL; 4417a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand else 4418a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand opcode = SHADER_OPCODE_TG4_LOGICAL; 4419a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand break; 4420a8e7b4f1d9ec50d2214e7694da26af6a108e506fFrancisco Jerez case nir_texop_texture_samples: 4421a8e7b4f1d9ec50d2214e7694da26af6a108e506fFrancisco Jerez opcode = SHADER_OPCODE_SAMPLEINFO_LOGICAL; 4422a8e7b4f1d9ec50d2214e7694da26af6a108e506fFrancisco Jerez break; 44237bc987abe0dc863b091bf77f5b02138ebe79e559Jason Ekstrand case nir_texop_samples_identical: { 44247bc987abe0dc863b091bf77f5b02138ebe79e559Jason Ekstrand fs_reg dst = retype(get_nir_dest(instr->dest), BRW_REGISTER_TYPE_D); 44257bc987abe0dc863b091bf77f5b02138ebe79e559Jason Ekstrand 44267bc987abe0dc863b091bf77f5b02138ebe79e559Jason Ekstrand /* If mcs is an immediate value, it means there is no MCS. In that case 44277bc987abe0dc863b091bf77f5b02138ebe79e559Jason Ekstrand * just return false. 44287bc987abe0dc863b091bf77f5b02138ebe79e559Jason Ekstrand */ 44297bc987abe0dc863b091bf77f5b02138ebe79e559Jason Ekstrand if (srcs[TEX_LOGICAL_SRC_MCS].file == BRW_IMMEDIATE_VALUE) { 44307bc987abe0dc863b091bf77f5b02138ebe79e559Jason Ekstrand bld.MOV(dst, brw_imm_ud(0u)); 44317bc987abe0dc863b091bf77f5b02138ebe79e559Jason Ekstrand } else if ((key_tex->msaa_16 & (1 << sampler))) { 44327bc987abe0dc863b091bf77f5b02138ebe79e559Jason Ekstrand fs_reg tmp = vgrf(glsl_type::uint_type); 44337bc987abe0dc863b091bf77f5b02138ebe79e559Jason Ekstrand bld.OR(tmp, srcs[TEX_LOGICAL_SRC_MCS], 44347bc987abe0dc863b091bf77f5b02138ebe79e559Jason Ekstrand offset(srcs[TEX_LOGICAL_SRC_MCS], bld, 1)); 44357bc987abe0dc863b091bf77f5b02138ebe79e559Jason Ekstrand bld.CMP(dst, tmp, brw_imm_ud(0u), BRW_CONDITIONAL_EQ); 44367bc987abe0dc863b091bf77f5b02138ebe79e559Jason Ekstrand } else { 44377bc987abe0dc863b091bf77f5b02138ebe79e559Jason Ekstrand bld.CMP(dst, srcs[TEX_LOGICAL_SRC_MCS], brw_imm_ud(0u), 44387bc987abe0dc863b091bf77f5b02138ebe79e559Jason Ekstrand BRW_CONDITIONAL_EQ); 44397bc987abe0dc863b091bf77f5b02138ebe79e559Jason Ekstrand } 44407bc987abe0dc863b091bf77f5b02138ebe79e559Jason Ekstrand return; 44417bc987abe0dc863b091bf77f5b02138ebe79e559Jason Ekstrand } 44422faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott default: 44432faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott unreachable("unknown texture opcode"); 44442faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott } 44452faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 4446f77cecf08cf9fba5e8f62e8ac1731c1916a97618Jason Ekstrand /* TXS and TXL require a LOD but not everything we implement using those 4447f77cecf08cf9fba5e8f62e8ac1731c1916a97618Jason Ekstrand * two opcodes provides one. Provide a default LOD of 0. 4448f77cecf08cf9fba5e8f62e8ac1731c1916a97618Jason Ekstrand */ 4449f77cecf08cf9fba5e8f62e8ac1731c1916a97618Jason Ekstrand if ((opcode == SHADER_OPCODE_TXS_LOGICAL || 4450f77cecf08cf9fba5e8f62e8ac1731c1916a97618Jason Ekstrand opcode == SHADER_OPCODE_TXL_LOGICAL) && 4451f77cecf08cf9fba5e8f62e8ac1731c1916a97618Jason Ekstrand srcs[TEX_LOGICAL_SRC_LOD].file == BAD_FILE) { 4452f77cecf08cf9fba5e8f62e8ac1731c1916a97618Jason Ekstrand srcs[TEX_LOGICAL_SRC_LOD] = brw_imm_ud(0u); 4453f77cecf08cf9fba5e8f62e8ac1731c1916a97618Jason Ekstrand } 4454f77cecf08cf9fba5e8f62e8ac1731c1916a97618Jason Ekstrand 4455faf20df143a63e58aa729446f21c38ae39a438f2Jason Ekstrand if (instr->op == nir_texop_tg4) { 4456faf20df143a63e58aa729446f21c38ae39a438f2Jason Ekstrand if (instr->component == 1 && 4457faf20df143a63e58aa729446f21c38ae39a438f2Jason Ekstrand key_tex->gather_channel_quirk_mask & (1 << texture)) { 4458faf20df143a63e58aa729446f21c38ae39a438f2Jason Ekstrand /* gather4 sampler is broken for green channel on RG32F -- 4459faf20df143a63e58aa729446f21c38ae39a438f2Jason Ekstrand * we must ask for blue instead. 4460faf20df143a63e58aa729446f21c38ae39a438f2Jason Ekstrand */ 4461faf20df143a63e58aa729446f21c38ae39a438f2Jason Ekstrand header_bits |= 2 << 16; 4462faf20df143a63e58aa729446f21c38ae39a438f2Jason Ekstrand } else { 4463faf20df143a63e58aa729446f21c38ae39a438f2Jason Ekstrand header_bits |= instr->component << 16; 4464faf20df143a63e58aa729446f21c38ae39a438f2Jason Ekstrand } 4465faf20df143a63e58aa729446f21c38ae39a438f2Jason Ekstrand } 4466faf20df143a63e58aa729446f21c38ae39a438f2Jason Ekstrand 4467a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand fs_reg dst = bld.vgrf(brw_type_for_nir_type(instr->dest_type), 4); 4468a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand fs_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs)); 4469faf20df143a63e58aa729446f21c38ae39a438f2Jason Ekstrand inst->offset = header_bits; 447013195f7ef85e0923a7b7d5b8a35eb6b6c257db1cKenneth Graunke 4471a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand const unsigned dest_size = nir_tex_instr_dest_size(instr); 4472a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand if (devinfo->gen >= 9 && 4473a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand instr->op != nir_texop_tg4 && instr->op != nir_texop_query_levels) { 4474a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand unsigned write_mask = instr->dest.is_ssa ? 4475a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand nir_ssa_def_components_read(&instr->dest.ssa): 4476a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand (1 << dest_size) - 1; 447713195f7ef85e0923a7b7d5b8a35eb6b6c257db1cKenneth Graunke assert(write_mask != 0); /* dead code should have been eliminated */ 44782d7d4a791083ff63f37ac1e40bfe8b448e7f8045Francisco Jerez inst->size_written = util_last_bit(write_mask) * 44792d7d4a791083ff63f37ac1e40bfe8b448e7f8045Francisco Jerez inst->dst.component_size(inst->exec_size); 4480a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand } else { 44812d7d4a791083ff63f37ac1e40bfe8b448e7f8045Francisco Jerez inst->size_written = 4 * inst->dst.component_size(inst->exec_size); 4482a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand } 4483a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand 4484a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand if (srcs[TEX_LOGICAL_SRC_SHADOW_C].file != BAD_FILE) 4485a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand inst->shadow_compare = true; 4486a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand 4487faf20df143a63e58aa729446f21c38ae39a438f2Jason Ekstrand if (instr->op == nir_texop_tg4 && devinfo->gen == 6) 4488faf20df143a63e58aa729446f21c38ae39a438f2Jason Ekstrand emit_gen6_gather_wa(key_tex->gen6_gather_wa[texture], dst); 4489a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand 44903ba228f9978cbabc2b4731327454dd91a208c317Jason Ekstrand fs_reg nir_dest[4]; 44913ba228f9978cbabc2b4731327454dd91a208c317Jason Ekstrand for (unsigned i = 0; i < dest_size; i++) 44923ba228f9978cbabc2b4731327454dd91a208c317Jason Ekstrand nir_dest[i] = offset(dst, bld, i); 449313195f7ef85e0923a7b7d5b8a35eb6b6c257db1cKenneth Graunke 44943ba228f9978cbabc2b4731327454dd91a208c317Jason Ekstrand if (instr->op == nir_texop_query_levels) { 44953ba228f9978cbabc2b4731327454dd91a208c317Jason Ekstrand /* # levels is in .w */ 44963ba228f9978cbabc2b4731327454dd91a208c317Jason Ekstrand nir_dest[0] = offset(dst, bld, 3); 449796dfed49e47eac7afc100e5b8d3b316dd6652fb6Jason Ekstrand } else if (instr->op == nir_texop_txs && 449896dfed49e47eac7afc100e5b8d3b316dd6652fb6Jason Ekstrand dest_size >= 3 && devinfo->gen < 7) { 449996dfed49e47eac7afc100e5b8d3b316dd6652fb6Jason Ekstrand /* Gen4-6 return 0 instead of 1 for single layer surfaces. */ 4500a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand fs_reg depth = offset(dst, bld, 2); 450196dfed49e47eac7afc100e5b8d3b316dd6652fb6Jason Ekstrand nir_dest[2] = vgrf(glsl_type::int_type); 450296dfed49e47eac7afc100e5b8d3b316dd6652fb6Jason Ekstrand bld.emit_minmax(nir_dest[2], depth, brw_imm_d(1), BRW_CONDITIONAL_GE); 4503a815499294afb485fe6773fba9ba12fa6773c654Jason Ekstrand } 45042faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 45053ba228f9978cbabc2b4731327454dd91a208c317Jason Ekstrand bld.LOAD_PAYLOAD(get_nir_dest(instr->dest), nir_dest, dest_size, 0); 45062faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott} 45072faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott 45082faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbottvoid 45093632c28bde071950dc57e42eb62a65fb838c8bdcFrancisco Jerezfs_visitor::nir_emit_jump(const fs_builder &bld, nir_jump_instr *instr) 45102faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott{ 45112faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott switch (instr->type) { 45122faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_jump_break: 45133632c28bde071950dc57e42eb62a65fb838c8bdcFrancisco Jerez bld.emit(BRW_OPCODE_BREAK); 45142faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 45152faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_jump_continue: 45163632c28bde071950dc57e42eb62a65fb838c8bdcFrancisco Jerez bld.emit(BRW_OPCODE_CONTINUE); 45172faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott break; 45182faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott case nir_jump_return: 45192faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott default: 45202faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott unreachable("unknown jump"); 45212faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott } 45222faf7f87d6a1c00b3f3d3907178a2eeeefa5d2a9Connor Abbott} 452350b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga 452450b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga/** 452550b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga * This helper takes the result of a load operation that reads 32-bit elements 452650b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga * in this format: 452750b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga * 452850b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga * x x x x x x x x 452950b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga * y y y y y y y y 453050b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga * z z z z z z z z 453150b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga * w w w w w w w w 453250b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga * 453350b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga * and shuffles the data to get this: 453450b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga * 453550b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga * x y x y x y x y 453650b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga * x y x y x y x y 453750b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga * z w z w z w z w 453850b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga * z w z w z w z w 453950b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga * 454050b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga * Which is exactly what we want if the load is reading 64-bit components 454150b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga * like doubles, where x represents the low 32-bit of the x double component 454250b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga * and y represents the high 32-bit of the x double component (likewise with 454350b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga * z and w for double component y). The parameter @components represents 454450b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga * the number of 64-bit components present in @src. This would typically be 454550b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga * 2 at most, since we can only fit 2 double elements in the result of a 454650b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga * vec4 load. 454750b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga * 454850b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga * Notice that @dst and @src can be the same register. 454950b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga */ 455050b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quirogavoid 455150b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quirogashuffle_32bit_load_result_to_64bit_data(const fs_builder &bld, 455250b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga const fs_reg &dst, 455350b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga const fs_reg &src, 455450b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga uint32_t components) 455550b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga{ 455650b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga assert(type_sz(src.type) == 4); 455750b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga assert(type_sz(dst.type) == 8); 455850b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga 455950b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga /* A temporary that we will use to shuffle the 32-bit data of each 456050b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga * component in the vector into valid 64-bit data. We can't write directly 456150b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga * to dst because dst can be (and would usually be) the same as src 456250b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga * and in that case the first MOV in the loop below would overwrite the 456350b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga * data read in the second MOV. 456450b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga */ 456550b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga fs_reg tmp = bld.vgrf(dst.type); 456650b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga 456750b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga for (unsigned i = 0; i < components; i++) { 456850b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga const fs_reg component_i = offset(src, bld, 2 * i); 456950b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga 457050b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga bld.MOV(subscript(tmp, src.type, 0), component_i); 457150b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga bld.MOV(subscript(tmp, src.type, 1), offset(component_i, bld, 1)); 457250b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga 457350b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga bld.MOV(offset(dst, bld, i), tmp); 457450b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga } 457550b7676dc46bae39c5e9b779828ef4fb2e1fbefcIago Toral Quiroga} 4576b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quiroga 4577b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quiroga/** 4578b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quiroga * This helper does the inverse operation of 4579b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quiroga * SHUFFLE_32BIT_LOAD_RESULT_TO_64BIT_DATA. 4580b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quiroga * 4581b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quiroga * We need to do this when we are going to use untyped write messsages that 4582b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quiroga * operate with 32-bit components in order to arrange our 64-bit data to be 4583b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quiroga * in the expected layout. 4584b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quiroga * 4585b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quiroga * Notice that callers of this function, unlike in the case of the inverse 4586b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quiroga * operation, would typically need to call this with dst and src being 4587b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quiroga * different registers, since they would otherwise corrupt the original 4588b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quiroga * 64-bit data they are about to write. Because of this the function checks 4589b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quiroga * that the src and dst regions involved in the operation do not overlap. 4590b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quiroga */ 4591b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quirogavoid 4592b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quirogashuffle_64bit_data_for_32bit_write(const fs_builder &bld, 4593b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quiroga const fs_reg &dst, 4594b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quiroga const fs_reg &src, 4595b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quiroga uint32_t components) 4596b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quiroga{ 4597b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quiroga assert(type_sz(src.type) == 8); 4598b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quiroga assert(type_sz(dst.type) == 4); 4599b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quiroga 4600c057278c065747c1f53579504bf109cafb7cb390Francisco Jerez assert(!regions_overlap( 4601c057278c065747c1f53579504bf109cafb7cb390Francisco Jerez dst, 2 * components * dst.component_size(bld.dispatch_width()), 4602c057278c065747c1f53579504bf109cafb7cb390Francisco Jerez src, components * src.component_size(bld.dispatch_width()))); 4603b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quiroga 4604b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quiroga for (unsigned i = 0; i < components; i++) { 4605b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quiroga const fs_reg component_i = offset(src, bld, i); 4606b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quiroga bld.MOV(offset(dst, bld, 2 * i), subscript(component_i, dst.type, 0)); 4607b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quiroga bld.MOV(offset(dst, bld, 2 * i + 1), subscript(component_i, dst.type, 1)); 4608b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quiroga } 4609b9aa66aa516c100d5476ee966f428aaf743d786cIago Toral Quiroga} 46109e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga 46119e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quirogafs_reg 46129e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quirogasetup_imm_df(const fs_builder &bld, double v) 46139e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga{ 4614527f37199929932300acc1688d8160e1f3b1d753Jason Ekstrand const struct gen_device_info *devinfo = bld.shader->devinfo; 46159e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga assert(devinfo->gen >= 7); 46169e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga 46179e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga if (devinfo->gen >= 8) 46189e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga return brw_imm_df(v); 46199e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga 462094135e8736f2741684e978afac9d34c368f7bcb1Samuel Iglesias Gonsálvez /* gen7.5 does not support DF immediates straighforward but the DIM 462194135e8736f2741684e978afac9d34c368f7bcb1Samuel Iglesias Gonsálvez * instruction allows to set the 64-bit immediate value. 462294135e8736f2741684e978afac9d34c368f7bcb1Samuel Iglesias Gonsálvez */ 462394135e8736f2741684e978afac9d34c368f7bcb1Samuel Iglesias Gonsálvez if (devinfo->is_haswell) { 4624c5ae6e78fc3bed83c6e18be6dbc8eb86a8db0898Samuel Iglesias Gonsálvez const fs_builder ubld = bld.exec_all().group(1, 0); 462594135e8736f2741684e978afac9d34c368f7bcb1Samuel Iglesias Gonsálvez fs_reg dst = ubld.vgrf(BRW_REGISTER_TYPE_DF, 1); 462694135e8736f2741684e978afac9d34c368f7bcb1Samuel Iglesias Gonsálvez ubld.DIM(dst, brw_imm_df(v)); 462794135e8736f2741684e978afac9d34c368f7bcb1Samuel Iglesias Gonsálvez return component(dst, 0); 462894135e8736f2741684e978afac9d34c368f7bcb1Samuel Iglesias Gonsálvez } 462994135e8736f2741684e978afac9d34c368f7bcb1Samuel Iglesias Gonsálvez 46309e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga /* gen7 does not support DF immediates, so we generate a 64-bit constant by 46319e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga * writing the low 32-bit of the constant to suboffset 0 of a VGRF and 46329e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga * the high 32-bit to suboffset 4 and then applying a stride of 0. 46339e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga * 46349e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga * Alternatively, we could also produce a normal VGRF (without stride 0) 46359e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga * by writing to all the channels in the VGRF, however, that would hit the 46369e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga * gen7 bug where we have to split writes that span more than 1 register 46379e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga * into instructions with a width of 4 (otherwise the write to the second 46389e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga * register written runs into an execmask hardware bug) which isn't very 46399e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga * nice. 46409e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga */ 46419e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga union { 46429e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga double d; 46439e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga struct { 46449e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga uint32_t i1; 46459e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga uint32_t i2; 46469e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga }; 46479e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga } di; 46489e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga 46499e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga di.d = v; 46509e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga 46519e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga const fs_builder ubld = bld.exec_all().group(1, 0); 46529e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2); 46539e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga ubld.MOV(tmp, brw_imm_ud(di.i1)); 46549e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga ubld.MOV(horiz_offset(tmp, 1), brw_imm_ud(di.i2)); 46559e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga 46569e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga return component(retype(tmp, BRW_REGISTER_TYPE_DF), 0); 46579e196e907ee87bff2b8c215df5e31a0cd1d1a322Iago Toral Quiroga} 4658