1a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand/* 2a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand * Copyright © 2014 Intel Corporation 3a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand * 4a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand * Permission is hereby granted, free of charge, to any person obtaining a 5a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand * copy of this software and associated documentation files (the "Software"), 6a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand * to deal in the Software without restriction, including without limitation 7a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand * and/or sell copies of the Software, and to permit persons to whom the 9a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand * Software is furnished to do so, subject to the following conditions: 10a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand * 11a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand * The above copyright notice and this permission notice (including the next 12a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand * paragraph) shall be included in all copies or substantial portions of the 13a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand * Software. 14a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand * 15a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand * IN THE SOFTWARE. 22a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand * 23a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand * Authors: 24a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand * Jason Ekstrand (jason@jlekstrand.net) 25a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand * 26a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand */ 27a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 2894ff35204dba0ddbd7f5c4342206c8acba22d32fEduardo Lima Mitev#include "brw_nir.h" 29c2799a80c5d7835d9c79ef8f7dcd6da0ca89ceb6Jason Ekstrand#include "compiler/nir/nir_builder.h" 30a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 31a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand/* 32a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand * Implements a small peephole optimization that looks for a multiply that 33a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand * is only ever used in an add and replaces both with an fma. 34a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand */ 35a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 367f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrandstatic inline bool 377f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrandare_all_uses_fadd(nir_ssa_def *def) 387f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand{ 39f752effa087f29faddabac047683d16416d178d9Jason Ekstrand if (!list_empty(&def->if_uses)) 407f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand return false; 417f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand 425015260a0598ada4865b2c6086fc669505f64749Jason Ekstrand nir_foreach_use(use_src, def) { 43f752effa087f29faddabac047683d16416d178d9Jason Ekstrand nir_instr *use_instr = use_src->parent_instr; 447f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand 457f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand if (use_instr->type != nir_instr_type_alu) 467f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand return false; 477f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand 487f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand nir_alu_instr *use_alu = nir_instr_as_alu(use_instr); 497f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand switch (use_alu->op) { 507f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand case nir_op_fadd: 517f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand break; /* This one's ok */ 527f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand 537f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand case nir_op_imov: 547f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand case nir_op_fmov: 557f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand case nir_op_fneg: 567f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand case nir_op_fabs: 577f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand assert(use_alu->dest.dest.is_ssa); 587f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand if (!are_all_uses_fadd(&use_alu->dest.dest.ssa)) 597f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand return false; 607f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand break; 617f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand 627f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand default: 637f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand return false; 647f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand } 657f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand } 667f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand 677f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand return true; 687f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand} 697f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand 70a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrandstatic nir_alu_instr * 712231cf0ba3a79d9abb08065e0f72811c5eea807fIago Toral Quirogaget_mul_for_src(nir_alu_src *src, int num_components, 722231cf0ba3a79d9abb08065e0f72811c5eea807fIago Toral Quiroga uint8_t swizzle[4], bool *negate, bool *abs) 73a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand{ 74418c004f802e63ca4e9f3456a46498d2fc543854Samuel Iglesias Gonsálvez uint8_t swizzle_tmp[4]; 75a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand assert(src->src.is_ssa && !src->abs && !src->negate); 76a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 77a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand nir_instr *instr = src->src.ssa->parent_instr; 78a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand if (instr->type != nir_instr_type_alu) 79a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand return NULL; 80a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 81a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand nir_alu_instr *alu = nir_instr_as_alu(instr); 8288ef2476dcdd61000cbae7ded9c8fa52927429d8Jason Ekstrand 8388ef2476dcdd61000cbae7ded9c8fa52927429d8Jason Ekstrand /* We want to bail if any of the other ALU operations involved is labled 8488ef2476dcdd61000cbae7ded9c8fa52927429d8Jason Ekstrand * exact. One reason for this is that, while the value that is changing is 8588ef2476dcdd61000cbae7ded9c8fa52927429d8Jason Ekstrand * actually the result of the add and not the multiply, the intention of 8688ef2476dcdd61000cbae7ded9c8fa52927429d8Jason Ekstrand * the user when they specify an exact multiply is that they want *that* 8788ef2476dcdd61000cbae7ded9c8fa52927429d8Jason Ekstrand * value and what they don't care about is the add. Another reason is that 8888ef2476dcdd61000cbae7ded9c8fa52927429d8Jason Ekstrand * SPIR-V explicitly requires this behaviour. 8988ef2476dcdd61000cbae7ded9c8fa52927429d8Jason Ekstrand */ 9088ef2476dcdd61000cbae7ded9c8fa52927429d8Jason Ekstrand if (alu->exact) 9188ef2476dcdd61000cbae7ded9c8fa52927429d8Jason Ekstrand return NULL; 9288ef2476dcdd61000cbae7ded9c8fa52927429d8Jason Ekstrand 93a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand switch (alu->op) { 94a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand case nir_op_imov: 95a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand case nir_op_fmov: 962231cf0ba3a79d9abb08065e0f72811c5eea807fIago Toral Quiroga alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs); 97a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand break; 98a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 99a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand case nir_op_fneg: 1002231cf0ba3a79d9abb08065e0f72811c5eea807fIago Toral Quiroga alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs); 101a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand *negate = !*negate; 102a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand break; 103a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 104a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand case nir_op_fabs: 1052231cf0ba3a79d9abb08065e0f72811c5eea807fIago Toral Quiroga alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs); 106a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand *negate = false; 107a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand *abs = true; 108a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand break; 109a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 110a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand case nir_op_fmul: 11160a27ad122128145d28be37e9c0b0bc86a8e5181Giuseppe Bilotta /* Only absorb a fmul into a ffma if the fmul is only used in fadd 112ca3b4d6d17a0f95b287e87888c9d893be94f0301Jason Ekstrand * operations. This prevents us from being too aggressive with our 1137f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand * fusing which can actually lead to more instructions. 1147f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand */ 1157f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand if (!are_all_uses_fadd(&alu->dest.dest.ssa)) 1167f344721b1a94a6166b53f959ff6b159af3b5f9aJason Ekstrand return NULL; 117a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand break; 118a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 119a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand default: 120a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand return NULL; 121a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand } 122a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 123a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand if (!alu) 124a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand return NULL; 125a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 126418c004f802e63ca4e9f3456a46498d2fc543854Samuel Iglesias Gonsálvez /* Copy swizzle data before overwriting it to avoid setting a wrong swizzle. 127418c004f802e63ca4e9f3456a46498d2fc543854Samuel Iglesias Gonsálvez * 128418c004f802e63ca4e9f3456a46498d2fc543854Samuel Iglesias Gonsálvez * Example: 129418c004f802e63ca4e9f3456a46498d2fc543854Samuel Iglesias Gonsálvez * Former swizzle[] = xyzw 130418c004f802e63ca4e9f3456a46498d2fc543854Samuel Iglesias Gonsálvez * src->swizzle[] = zyxx 131418c004f802e63ca4e9f3456a46498d2fc543854Samuel Iglesias Gonsálvez * 132418c004f802e63ca4e9f3456a46498d2fc543854Samuel Iglesias Gonsálvez * Expected output swizzle = zyxx 133418c004f802e63ca4e9f3456a46498d2fc543854Samuel Iglesias Gonsálvez * If we reuse swizzle in the loop, then output swizzle would be zyzz. 134418c004f802e63ca4e9f3456a46498d2fc543854Samuel Iglesias Gonsálvez */ 135418c004f802e63ca4e9f3456a46498d2fc543854Samuel Iglesias Gonsálvez memcpy(swizzle_tmp, swizzle, 4*sizeof(uint8_t)); 13632cdb49fe2f0211040bfb16e668169097199bfccRhys Kidd for (int i = 0; i < num_components; i++) 137418c004f802e63ca4e9f3456a46498d2fc543854Samuel Iglesias Gonsálvez swizzle[i] = swizzle_tmp[src->swizzle[i]]; 138a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 139a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand return alu; 140a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand} 141a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 142de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitev/** 143de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitev * Given a list of (at least two) nir_alu_src's, tells if any of them is a 144de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitev * constant value and is used only once. 145de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitev */ 146de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitevstatic bool 147de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitevany_alu_src_is_a_constant(nir_alu_src srcs[]) 148de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitev{ 149de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitev for (unsigned i = 0; i < 2; i++) { 150de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitev if (srcs[i].src.ssa->parent_instr->type == nir_instr_type_load_const) { 151de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitev nir_load_const_instr *load_const = 152de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitev nir_instr_as_load_const (srcs[i].src.ssa->parent_instr); 153de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitev 154de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitev if (list_is_singular(&load_const->def.uses) && 155de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitev list_empty(&load_const->def.if_uses)) { 156de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitev return true; 157de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitev } 158de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitev } 159de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitev } 160de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitev 161de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitev return false; 162de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitev} 163de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitev 164a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrandstatic bool 165c2799a80c5d7835d9c79ef8f7dcd6da0ca89ceb6Jason Ekstrandbrw_nir_opt_peephole_ffma_block(nir_builder *b, nir_block *block) 166a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand{ 1677efff10585122d484dc3adab14af9380b9b8f309Connor Abbott bool progress = false; 168a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 169707e72f13bb78869ee95d3286980bf1709cba6cfJason Ekstrand nir_foreach_instr_safe(instr, block) { 170a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand if (instr->type != nir_instr_type_alu) 171a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand continue; 172a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 173a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand nir_alu_instr *add = nir_instr_as_alu(instr); 174a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand if (add->op != nir_op_fadd) 175a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand continue; 176a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 177865e83b9ec86c9ccec7100dfae9f80ff2969753cJason Ekstrand assert(add->dest.dest.is_ssa); 178865e83b9ec86c9ccec7100dfae9f80ff2969753cJason Ekstrand if (add->exact) 179865e83b9ec86c9ccec7100dfae9f80ff2969753cJason Ekstrand continue; 180a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 181a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand assert(add->src[0].src.is_ssa && add->src[1].src.is_ssa); 182a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 183a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand /* This, is the case a + a. We would rather handle this with an 184a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand * algebraic reduction than fuse it. Also, we want to only fuse 185a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand * things where the multiply is used only once and, in this case, 186a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand * it would be used twice by the same instruction. 187a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand */ 188a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand if (add->src[0].src.ssa == add->src[1].src.ssa) 189a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand continue; 190a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 191a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand nir_alu_instr *mul; 192a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand uint8_t add_mul_src, swizzle[4]; 193a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand bool negate, abs; 194a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand for (add_mul_src = 0; add_mul_src < 2; add_mul_src++) { 195a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand for (unsigned i = 0; i < 4; i++) 196a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand swizzle[i] = i; 197a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 198a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand negate = false; 199a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand abs = false; 200a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 2012231cf0ba3a79d9abb08065e0f72811c5eea807fIago Toral Quiroga mul = get_mul_for_src(&add->src[add_mul_src], 2022231cf0ba3a79d9abb08065e0f72811c5eea807fIago Toral Quiroga add->dest.dest.ssa.num_components, 2032231cf0ba3a79d9abb08065e0f72811c5eea807fIago Toral Quiroga swizzle, &negate, &abs); 204a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 205a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand if (mul != NULL) 206a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand break; 207a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand } 208a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 209a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand if (mul == NULL) 210a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand continue; 211a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 2123124ce699bb3844e793f00e00bfbea5c91744f90Connor Abbott unsigned bit_size = add->dest.dest.ssa.bit_size; 2133124ce699bb3844e793f00e00bfbea5c91744f90Connor Abbott 214a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand nir_ssa_def *mul_src[2]; 215a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand mul_src[0] = mul->src[0].src.ssa; 216a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand mul_src[1] = mul->src[1].src.ssa; 217a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 218de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitev /* If any of the operands of the fmul and any of the fadd is a constant, 219de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitev * we bypass because it will be more efficient as the constants will be 220de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitev * propagated as operands, potentially saving two load_const instructions. 221de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitev */ 222de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitev if (any_alu_src_is_a_constant(mul->src) && 223de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitev any_alu_src_is_a_constant(add->src)) { 224de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitev continue; 225de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitev } 226de51676b410ff3ccab1df765f8eee15126c9de4cEduardo Lima Mitev 227c2799a80c5d7835d9c79ef8f7dcd6da0ca89ceb6Jason Ekstrand b->cursor = nir_before_instr(&add->instr); 228c2799a80c5d7835d9c79ef8f7dcd6da0ca89ceb6Jason Ekstrand 229a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand if (abs) { 230c2799a80c5d7835d9c79ef8f7dcd6da0ca89ceb6Jason Ekstrand for (unsigned i = 0; i < 2; i++) 231c2799a80c5d7835d9c79ef8f7dcd6da0ca89ceb6Jason Ekstrand mul_src[i] = nir_fabs(b, mul_src[i]); 232a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand } 233a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 234c2799a80c5d7835d9c79ef8f7dcd6da0ca89ceb6Jason Ekstrand if (negate) 235c2799a80c5d7835d9c79ef8f7dcd6da0ca89ceb6Jason Ekstrand mul_src[0] = nir_fneg(b, mul_src[0]); 236a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 237c2799a80c5d7835d9c79ef8f7dcd6da0ca89ceb6Jason Ekstrand nir_alu_instr *ffma = nir_alu_instr_create(b->shader, nir_op_ffma); 238a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand ffma->dest.saturate = add->dest.saturate; 239a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand ffma->dest.write_mask = add->dest.write_mask; 240a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 241a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand for (unsigned i = 0; i < 2; i++) { 242a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand ffma->src[i].src = nir_src_for_ssa(mul_src[i]); 243a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand for (unsigned j = 0; j < add->dest.dest.ssa.num_components; j++) 244a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand ffma->src[i].swizzle[j] = mul->src[i].swizzle[swizzle[j]]; 245a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand } 2468c8fc5f8336c8c79e5890265ae6c03271aa94075Jason Ekstrand nir_alu_src_copy(&ffma->src[2], &add->src[1 - add_mul_src], ffma); 247a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 248a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand assert(add->dest.dest.is_ssa); 249a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 250a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand nir_ssa_dest_init(&ffma->instr, &ffma->dest.dest, 251a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand add->dest.dest.ssa.num_components, 2523124ce699bb3844e793f00e00bfbea5c91744f90Connor Abbott bit_size, 253a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand add->dest.dest.ssa.name); 254a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand nir_ssa_def_rewrite_uses(&add->dest.dest.ssa, 255a4aa25be1e0a27b1a6a6b0bcf576beb9dfe1ea7aJason Ekstrand nir_src_for_ssa(&ffma->dest.dest.ssa)); 256a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 257c2799a80c5d7835d9c79ef8f7dcd6da0ca89ceb6Jason Ekstrand nir_builder_instr_insert(b, &ffma->instr); 258f752effa087f29faddabac047683d16416d178d9Jason Ekstrand assert(list_empty(&add->dest.dest.ssa.uses)); 259a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand nir_instr_remove(&add->instr); 260a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 2617efff10585122d484dc3adab14af9380b9b8f309Connor Abbott progress = true; 262a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand } 263a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 2647efff10585122d484dc3adab14af9380b9b8f309Connor Abbott return progress; 265a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand} 266a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 267a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrandstatic bool 26894ff35204dba0ddbd7f5c4342206c8acba22d32fEduardo Lima Mitevbrw_nir_opt_peephole_ffma_impl(nir_function_impl *impl) 269a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand{ 2707efff10585122d484dc3adab14af9380b9b8f309Connor Abbott bool progress = false; 271c2799a80c5d7835d9c79ef8f7dcd6da0ca89ceb6Jason Ekstrand 272c2799a80c5d7835d9c79ef8f7dcd6da0ca89ceb6Jason Ekstrand nir_builder builder; 273c2799a80c5d7835d9c79ef8f7dcd6da0ca89ceb6Jason Ekstrand nir_builder_init(&builder, impl); 274a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 2757efff10585122d484dc3adab14af9380b9b8f309Connor Abbott nir_foreach_block(block, impl) { 276c2799a80c5d7835d9c79ef8f7dcd6da0ca89ceb6Jason Ekstrand progress |= brw_nir_opt_peephole_ffma_block(&builder, block); 2777efff10585122d484dc3adab14af9380b9b8f309Connor Abbott } 278a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 2797efff10585122d484dc3adab14af9380b9b8f309Connor Abbott if (progress) 280a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand nir_metadata_preserve(impl, nir_metadata_block_index | 281a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand nir_metadata_dominance); 282a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 2837efff10585122d484dc3adab14af9380b9b8f309Connor Abbott return progress; 284a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand} 285a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 286a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrandbool 28794ff35204dba0ddbd7f5c4342206c8acba22d32fEduardo Lima Mitevbrw_nir_opt_peephole_ffma(nir_shader *shader) 288a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand{ 289a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand bool progress = false; 290a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 2919464d8c49813aba77285e7465b96e92a91ed327cJason Ekstrand nir_foreach_function(function, shader) { 292237f2f2d8b45d9d956102eec6f9be63193e5269bJason Ekstrand if (function->impl) 293237f2f2d8b45d9d956102eec6f9be63193e5269bJason Ekstrand progress |= brw_nir_opt_peephole_ffma_impl(function->impl); 294a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand } 295a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand 296a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand return progress; 297a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66Jason Ekstrand} 298