st_glsl_to_tgsi.cpp revision e16b0a51be7866f3856b62b295df2bcf49e02384
1e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov/* 2e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. 3e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov * Copyright (C) 2008 VMware, Inc. All Rights Reserved. 4ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * Copyright © 2010 Intel Corporation 5e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov * Copyright © 2011 Bryan Cain 6e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov * 7ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * Permission is hereby granted, free of charge, to any person obtaining a 8ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * copy of this software and associated documentation files (the "Software"), 9ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * to deal in the Software without restriction, including without limitation 104d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * the rights to use, copy, modify, merge, publish, distribute, sublicense, 114d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * and/or sell copies of the Software, and to permit persons to whom the 124d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * Software is furnished to do so, subject to the following conditions: 134d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * 144d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * The above copyright notice and this permission notice (including the next 154d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * paragraph) shall be included in all copies or substantial portions of the 164d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * Software. 174d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * 184d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 194d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 204d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 214d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 224d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 234d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 244d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * DEALINGS IN THE SOFTWARE. 254d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann */ 264d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 274d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann/** 284d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * \file glsl_to_tgsi.cpp 294d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * 304d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * Translate GLSL IR to TGSI. 314d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann */ 324d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 334d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#include <stdio.h> 34ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#include "main/compiler.h" 35ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#include "ir.h" 36ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#include "ir_visitor.h" 374d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#include "ir_print_visitor.h" 38ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#include "ir_expression_flattening.h" 39ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#include "glsl_types.h" 404d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#include "glsl_parser_extras.h" 414d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#include "../glsl/program.h" 424d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#include "ir_optimization.h" 43ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#include "ast.h" 444d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 45ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#include "main/mtypes.h" 46ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#include "main/shaderobj.h" 475ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann#include "program/hash_table.h" 485ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann 495ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmannextern "C" { 505ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann#include "main/shaderapi.h" 515ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann#include "main/uniforms.h" 525ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann#include "program/prog_instruction.h" 535ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann#include "program/prog_optimize.h" 545ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann#include "program/prog_print.h" 555ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann#include "program/program.h" 565ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann#include "program/prog_parameter.h" 575ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann#include "program/sampler.h" 585ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann 595ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann#include "pipe/p_compiler.h" 605ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann#include "pipe/p_context.h" 615ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann#include "pipe/p_screen.h" 625ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann#include "pipe/p_shader_tokens.h" 634d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#include "pipe/p_state.h" 644d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#include "util/u_math.h" 654d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#include "tgsi/tgsi_ureg.h" 66e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#include "tgsi/tgsi_info.h" 674d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#include "st_context.h" 68e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#include "st_program.h" 694d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#include "st_glsl_to_tgsi.h" 704d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#include "st_mesa_to_tgsi.h" 71ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann} 724d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 734d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX 744d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \ 754d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann (1 << PROGRAM_ENV_PARAM) | \ 764d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann (1 << PROGRAM_STATE_VAR) | \ 774d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann (1 << PROGRAM_NAMED_PARAM) | \ 784d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann (1 << PROGRAM_CONSTANT) | \ 794d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann (1 << PROGRAM_UNIFORM)) 804d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 814d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann/** 824d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * Maximum number of temporary registers. 834d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * 844d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * It is too big for stack allocated arrays -- it will cause stack overflow on 854d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * Windows and likely Mac OS X. 864d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann */ 874d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#define MAX_TEMPS 4096 884d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 894d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann/* will be 4 for GLSL 4.00 */ 904d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#define MAX_GLSL_TEXTURE_OFFSET 1 914d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 924d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannclass st_src_reg; 934d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannclass st_dst_reg; 944d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 954d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannstatic int swizzle_for_size(int size); 964d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 974d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann/** 984d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * This struct is a corresponding struct to TGSI ureg_src. 994d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann */ 100ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannclass st_src_reg { 1014d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannpublic: 1024d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann st_src_reg(gl_register_file file, int index, const glsl_type *type) 1034d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann { 1044d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->file = file; 1054d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->index = index; 1064d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) 1074d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->swizzle = swizzle_for_size(type->vector_elements); 1084d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann else 1094d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->swizzle = SWIZZLE_XYZW; 1104d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->negate = 0; 1114d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->type = type ? type->base_type : GLSL_TYPE_ERROR; 1124d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->reladdr = NULL; 1135ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann } 1144d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 1154d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann st_src_reg(gl_register_file file, int index, int type) 1164d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann { 1174d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->type = type; 1184d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->file = file; 1194d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->index = index; 1204d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->swizzle = SWIZZLE_XYZW; 1214d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->negate = 0; 1224d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->reladdr = NULL; 1234d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann } 1244d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 1254d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann st_src_reg() 1264d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann { 1274d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->type = GLSL_TYPE_ERROR; 1284d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->file = PROGRAM_UNDEFINED; 1294d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->index = 0; 1304d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->swizzle = 0; 1314d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->negate = 0; 1324d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->reladdr = NULL; 1334d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann } 134ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 135ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann explicit st_src_reg(st_dst_reg reg); 1364d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 1374d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann gl_register_file file; /**< PROGRAM_* from Mesa */ 1384d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ 1394d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ 1404d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann int negate; /**< NEGATE_XYZW mask from mesa */ 1414d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ 1424d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann /** Register index should be offset by the integer in this reg. */ 1434d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann st_src_reg *reladdr; 1444d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann}; 1454d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 1464d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannclass st_dst_reg { 1474d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannpublic: 1484d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann st_dst_reg(gl_register_file file, int writemask, int type) 1494d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann { 1504d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->file = file; 1514d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->index = 0; 1524d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->writemask = writemask; 1534d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->cond_mask = COND_TR; 1544d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->reladdr = NULL; 1554d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->type = type; 1564d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann } 157ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 158ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann st_dst_reg() 1594d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann { 1604d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->type = GLSL_TYPE_ERROR; 1614d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->file = PROGRAM_UNDEFINED; 1624d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->index = 0; 1634d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->writemask = 0; 1644d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->cond_mask = COND_TR; 1654d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->reladdr = NULL; 1664d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann } 1674d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 1684d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann explicit st_dst_reg(st_src_reg reg); 1694d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 1704d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann gl_register_file file; /**< PROGRAM_* from Mesa */ 1714d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ 1724d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ 173ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann GLuint cond_mask:4; 174ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ 175ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann /** Register index should be offset by the integer in this reg. */ 1764d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann st_src_reg *reladdr; 1774d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann}; 1784d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 1794d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannst_src_reg::st_src_reg(st_dst_reg reg) 1804d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{ 1814d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->type = reg.type; 1824d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->file = reg.file; 1834d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->index = reg.index; 1844d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->swizzle = SWIZZLE_XYZW; 1854d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->negate = 0; 1864d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->reladdr = reg.reladdr; 1874d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann} 1884d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 1894d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannst_dst_reg::st_dst_reg(st_src_reg reg) 1904d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{ 1914d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->type = reg.type; 1924d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->file = reg.file; 1934d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->index = reg.index; 1944d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->writemask = WRITEMASK_XYZW; 1954d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->cond_mask = COND_TR; 1964d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->reladdr = reg.reladdr; 1974d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann} 1984d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 1994d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannclass glsl_to_tgsi_instruction : public exec_node { 2004d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannpublic: 201ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann /* Callers of this ralloc-based new need not call delete. It's 202ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * easier to just ralloc_free 'ctx' (or any of its ancestors). */ 203ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann static void* operator new(size_t size, void *ctx) 204ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann { 205ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann void *node; 2064d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 207ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann node = rzalloc_size(ctx, size); 208ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann assert(node != NULL); 209ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 210ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann return node; 211ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 212ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 213ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann unsigned op; 214ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann st_dst_reg dst; 215e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov st_src_reg src[3]; 216e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov /** Pointer to the ir source this tree came from for debugging */ 2174d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann ir_instruction *ir; 218ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann GLboolean cond_update; 219ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann bool saturate; 220ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int sampler; /**< sampler index */ 2214d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann int tex_target; /**< One of TEXTURE_*_INDEX */ 2224d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann GLboolean tex_shadow; 2234d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET]; 2244d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann unsigned tex_offset_num_offset; 2254d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann int dead_mask; /**< Used in dead code elimination */ 2264d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 2274d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */ 228ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann}; 2294d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 230ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannclass variable_storage : public exec_node { 231ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannpublic: 232ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann variable_storage(ir_variable *var, gl_register_file file, int index) 233ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann : file(file), index(index), var(var) 2344d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann { 235ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann /* empty */ 2364d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann } 237ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 238ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann gl_register_file file; 239ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int index; 240ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann ir_variable *var; /* variable that maps to this, if any */ 241ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann}; 242ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 243ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannclass immediate_storage : public exec_node { 244ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannpublic: 245ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann immediate_storage(gl_constant_value *values, int size, int type) 246ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann { 247ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann memcpy(this->values, values, size * sizeof(gl_constant_value)); 248ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann this->size = size; 249ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann this->type = type; 250ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 251ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 252ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann gl_constant_value values[4]; 253ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int size; /**< Number of components (1-4) */ 254ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */ 255ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann}; 256ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 257ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannclass function_entry : public exec_node { 2584d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannpublic: 2594d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann ir_function_signature *sig; 2604d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 261ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann /** 2624d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * identifier of this function signature used by the program. 263ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * 264ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * At the point that TGSI instructions for function calls are 265ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * generated, we don't know the address of the first instruction of 266ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * the function body. So we make the BranchTarget that is called a 2674d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * small integer and rewrite them during set_branchtargets(). 268ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann */ 2694d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann int sig_id; 270ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 271ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann /** 2724d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * Pointer to first instruction of the function body. 273ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * 2744d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * Set during function body emits after main() is processed. 275ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann */ 276ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann glsl_to_tgsi_instruction *bgn_inst; 277ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 278ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann /** 279ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * Index of the first instruction of the function body in actual TGSI. 2804d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * 2814d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * Set after conversion from glsl_to_tgsi_instruction to TGSI. 2824d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann */ 2834d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann int inst; 2844d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 2854d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann /** Storage for the return value. */ 2864d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann st_src_reg return_reg; 2874d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann}; 2884d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 2894d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannclass glsl_to_tgsi_visitor : public ir_visitor { 2904d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannpublic: 2914d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann glsl_to_tgsi_visitor(); 2924d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann ~glsl_to_tgsi_visitor(); 2934d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 2944d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann function_entry *current_function; 295ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 2964d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann struct gl_context *ctx; 2974d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann struct gl_program *prog; 2984d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann struct gl_shader_program *shader_program; 299ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann struct gl_shader_compiler_options *options; 300ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 301ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int next_temp; 3024d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 303ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int num_address_regs; 3044d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann int samplers_used; 3054d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann bool indirect_addr_temps; 3064d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann bool indirect_addr_consts; 3074d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann int num_clip_distances; 3084d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 3094d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann int glsl_version; 310ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann bool native_integers; 311ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 3124d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann variable_storage *find_variable_storage(ir_variable *var); 3134d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 3144d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann int add_constant(gl_register_file file, gl_constant_value values[4], 3154d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann int size, int datatype, GLuint *swizzle_out); 3164d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 317ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann function_entry *get_function_signature(ir_function_signature *sig); 3184d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 319ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann st_src_reg get_temp(const glsl_type *type); 3204d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr); 3214d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 322ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann st_src_reg st_src_reg_for_float(float val); 323ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann st_src_reg st_src_reg_for_int(int val); 3244d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann st_src_reg st_src_reg_for_type(int type, int val); 3254d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 3264d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann /** 3274d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * \name Visit methods 3284d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * 3294d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * As typical for the visitor pattern, there must be one \c visit method for 3304d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * each concrete subclass of \c ir_instruction. Virtual base classes within 3314d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * the hierarchy should not have \c visit methods. 3324d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann */ 3334d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann /*@{*/ 3344d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann virtual void visit(ir_variable *); 3354d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann virtual void visit(ir_loop *); 336ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann virtual void visit(ir_loop_jump *); 3374d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann virtual void visit(ir_function_signature *); 338e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov virtual void visit(ir_function *); 339ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann virtual void visit(ir_expression *); 340ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann virtual void visit(ir_swizzle *); 341ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann virtual void visit(ir_dereference_variable *); 342ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann virtual void visit(ir_dereference_array *); 343ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann virtual void visit(ir_dereference_record *); 344ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann virtual void visit(ir_assignment *); 345ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann virtual void visit(ir_constant *); 346ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann virtual void visit(ir_call *); 347ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann virtual void visit(ir_return *); 348ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann virtual void visit(ir_discard *); 349ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann virtual void visit(ir_texture *); 350ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann virtual void visit(ir_if *); 351e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov /*@}*/ 352ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 353ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann st_src_reg result; 354ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 355ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann /** List of variable_storage */ 356ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann exec_list variables; 3574d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 358ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann /** List of immediate_storage */ 359ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann exec_list immediates; 360e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov unsigned num_immediates; 361ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 362ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann /** List of function_entry */ 363ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann exec_list function_signatures; 364ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int next_signature_id; 3654d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 3664d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann /** List of glsl_to_tgsi_instruction */ 3674d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann exec_list instructions; 3684d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 3694d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op); 370ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 371ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 3724d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann st_dst_reg dst, st_src_reg src0); 3734d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 374ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 375ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann st_dst_reg dst, st_src_reg src0, st_src_reg src1); 376ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 377ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 378ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann st_dst_reg dst, 379ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann st_src_reg src0, st_src_reg src1, st_src_reg src2); 380ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 381ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann unsigned get_opcode(ir_instruction *ir, unsigned op, 3824d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann st_dst_reg dst, 3835ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann st_src_reg src0, st_src_reg src1); 3845ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann 3855ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann /** 3865ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann * Emit the correct dot-product instruction for the type of arguments 3875ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann */ 3885ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir, 3895ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann st_dst_reg dst, 3905ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann st_src_reg src0, 3915ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann st_src_reg src1, 3925ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann unsigned elements); 3935ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann 3945ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann void emit_scalar(ir_instruction *ir, unsigned op, 3955ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann st_dst_reg dst, st_src_reg src0); 3965ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann 3975ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann void emit_scalar(ir_instruction *ir, unsigned op, 3985ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann st_dst_reg dst, st_src_reg src0, st_src_reg src1); 3995ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann 4005ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann void try_emit_float_set(ir_instruction *ir, unsigned op, st_dst_reg dst); 4015ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann 4025ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0); 4034d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 4044d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann void emit_scs(ir_instruction *ir, unsigned op, 4054d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann st_dst_reg dst, const st_src_reg &src); 406ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 407ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann bool try_emit_mad(ir_expression *ir, 408ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int mul_operand); 409ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann bool try_emit_mad_for_and_not(ir_expression *ir, 410ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int mul_operand); 411ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann bool try_emit_sat(ir_expression *ir); 4124d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 4134d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann void emit_swz(ir_expression *ir); 4144d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 415ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann bool process_move_condition(ir_rvalue *ir); 4164d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 4174d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann void simplify_cmp(void); 418ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 4194d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann void rename_temp_register(int index, int new_index); 420ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int get_first_temp_read(int index); 421ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int get_first_temp_write(int index); 422ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int get_last_temp_read(int index); 423ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int get_last_temp_write(int index); 424ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 4254d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann void copy_propagate(void); 4264d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann void eliminate_dead_code(void); 427e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int eliminate_dead_code_advanced(void); 428e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov void merge_registers(void); 429e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov void renumber_registers(void); 430e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov 431ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann void *mem_ctx; 432ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann}; 433e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov 434e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovstatic st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); 435ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 436ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannstatic st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); 437ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 438ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannstatic st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT); 439ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 4404d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannstatic void 4414d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannfail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3); 442ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 4434d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannstatic void 4444d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannfail_link(struct gl_shader_program *prog, const char *fmt, ...) 4454d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{ 4464d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann va_list args; 4474d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann va_start(args, fmt); 448ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann ralloc_vasprintf_append(&prog->InfoLog, fmt, args); 449ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann va_end(args); 4504d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 451ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann prog->LinkStatus = GL_FALSE; 452ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann} 4534d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 454ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannstatic int 455ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannswizzle_for_size(int size) 4564d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{ 457ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int size_swizzles[4] = { 458ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), 459ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), 460ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), 461ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), 462ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann }; 463ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 464ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann assert((size >= 1) && (size <= 4)); 465ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann return size_swizzles[size - 1]; 466ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann} 467ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 4684d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannstatic bool 4694d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannis_tex_instruction(unsigned opcode) 4704d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{ 4714d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 4724d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann return info->is_tex; 4734d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann} 4744d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 4754d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannstatic unsigned 4764d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannnum_inst_dst_regs(unsigned opcode) 4774d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{ 4784d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 4794d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann return info->num_dst; 4804d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann} 4814d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 4824d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannstatic unsigned 4834d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannnum_inst_src_regs(unsigned opcode) 4844d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{ 4854d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 4864d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann return info->is_tex ? info->num_src - 1 : info->num_src; 4874d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann} 488ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 489ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_instruction * 490ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 491ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann st_dst_reg dst, 4924d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann st_src_reg src0, st_src_reg src1, st_src_reg src2) 4934d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{ 4944d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction(); 495ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int num_reladdr = 0, i; 496ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 4974d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann op = get_opcode(ir, op, dst, src0, src1); 498ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 499ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann /* If we have to do relative addressing, we want to load the ARL 5004d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * reg directly for one of the regs, and preload the other reladdr 5014d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * sources into temps. 5024d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann */ 5034d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann num_reladdr += dst.reladdr != NULL; 5044d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann num_reladdr += src0.reladdr != NULL; 5054d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann num_reladdr += src1.reladdr != NULL; 506ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann num_reladdr += src2.reladdr != NULL; 5074d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 508e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov reladdr_to_temp(ir, &src2, &num_reladdr); 509e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov reladdr_to_temp(ir, &src1, &num_reladdr); 510ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann reladdr_to_temp(ir, &src0, &num_reladdr); 511ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 512ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if (dst.reladdr) { 513ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann emit_arl(ir, address_reg, *dst.reladdr); 5144d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann num_reladdr--; 515ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 516ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann assert(num_reladdr == 0); 5174d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 518ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann inst->op = op; 5194d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann inst->dst = dst; 520ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann inst->src[0] = src0; 521ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann inst->src[1] = src1; 522ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann inst->src[2] = src2; 5234d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann inst->ir = ir; 524ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann inst->dead_mask = 0; 5254d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 526ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann inst->function = NULL; 5274d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 528ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if (op == TGSI_OPCODE_ARL || op == TGSI_OPCODE_UARL) 5294d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->num_address_regs = 1; 530ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 5314d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann /* Update indirect addressing status used by TGSI */ 532ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if (dst.reladdr) { 5334d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann switch(dst.file) { 5344d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann case PROGRAM_TEMPORARY: 5354d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->indirect_addr_temps = true; 536ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann break; 537ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case PROGRAM_LOCAL_PARAM: 538ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case PROGRAM_ENV_PARAM: 5394d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann case PROGRAM_STATE_VAR: 540ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case PROGRAM_NAMED_PARAM: 541ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case PROGRAM_CONSTANT: 542ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case PROGRAM_UNIFORM: 5434d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->indirect_addr_consts = true; 544ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann break; 5454d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann case PROGRAM_IMMEDIATE: 5464d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann assert(!"immediates should not have indirect addressing"); 5474d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann break; 548ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann default: 549ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann break; 5504d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann } 5514d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann } 5524d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann else { 5534d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann for (i=0; i<3; i++) { 5544d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann if(inst->src[i].reladdr) { 555ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann switch(inst->src[i].file) { 5564d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann case PROGRAM_TEMPORARY: 5574d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->indirect_addr_temps = true; 5584d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann break; 559ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case PROGRAM_LOCAL_PARAM: 5604d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann case PROGRAM_ENV_PARAM: 561ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case PROGRAM_STATE_VAR: 562ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case PROGRAM_NAMED_PARAM: 563ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case PROGRAM_CONSTANT: 5644d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann case PROGRAM_UNIFORM: 565ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann this->indirect_addr_consts = true; 566ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann break; 567ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case PROGRAM_IMMEDIATE: 568e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov assert(!"immediates should not have indirect addressing"); 569ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann break; 570ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann default: 571ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann break; 572ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 5734d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann } 5744d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann } 575ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 5764d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 5774d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->instructions.push_tail(inst); 5784d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 5794d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann if (native_integers) 5804d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann try_emit_float_set(ir, op, dst); 5814d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 5824d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann return inst; 583ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann} 5844d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 5854d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 586ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_instruction * 587ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 588ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann st_dst_reg dst, st_src_reg src0, st_src_reg src1) 589ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann{ 590ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann return emit(ir, op, dst, src0, src1, undef_src); 5914d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann} 5924d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 5934d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannglsl_to_tgsi_instruction * 594ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 5954d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann st_dst_reg dst, st_src_reg src0) 5964d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{ 5974d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann assert(dst.writemask != 0); 5984d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann return emit(ir, op, dst, src0, undef_src, undef_src); 5994d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann} 6004d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 6014d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannglsl_to_tgsi_instruction * 602ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op) 6034d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{ 6044d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); 605ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann} 606ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 607ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann /** 608ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * Emits the code to convert the result of float SET instructions to integers. 609ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann */ 6104d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannvoid 611ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::try_emit_float_set(ir_instruction *ir, unsigned op, 612ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann st_dst_reg dst) 613ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann{ 614ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if ((op == TGSI_OPCODE_SEQ || 6154d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann op == TGSI_OPCODE_SNE || 616ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann op == TGSI_OPCODE_SGE || 617ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann op == TGSI_OPCODE_SLT)) 618ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann { 6194d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann st_src_reg src = st_src_reg(dst); 620ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann src.negate = ~src.negate; 621ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann dst.type = GLSL_TYPE_FLOAT; 6224d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann emit(ir, TGSI_OPCODE_F2I, dst, src); 623ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 624ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann} 6254d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 626ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann/** 627ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * Determines whether to use an integer, unsigned integer, or float opcode 628ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * based on the operands and input opcode, then emits the result. 6294d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann */ 6304d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannunsigned 631ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, 632ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann st_dst_reg dst, 633ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann st_src_reg src0, st_src_reg src1) 6344d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{ 635ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int type = GLSL_TYPE_FLOAT; 6364d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 637ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT) 6384d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann type = GLSL_TYPE_FLOAT; 639ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann else if (native_integers) 640ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type; 641ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 642ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#define case4(c, f, i, u) \ 6434d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann case TGSI_OPCODE_##c: \ 644ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \ 645ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \ 646ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann else op = TGSI_OPCODE_##f; \ 6474d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann break; 648ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#define case3(f, i, u) case4(f, f, i, u) 649ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#define case2fi(f, i) case4(f, f, i, i) 650ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#define case2iu(i, u) case4(i, LAST, i, u) 651ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 652ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann switch(op) { 653ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case2fi(ADD, UADD); 654ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case2fi(MUL, UMUL); 655ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case2fi(MAD, UMAD); 656ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case3(DIV, IDIV, UDIV); 657ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case3(MAX, IMAX, UMAX); 658ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case3(MIN, IMIN, UMIN); 659ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case2iu(MOD, UMOD); 660ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 661ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case2fi(SEQ, USEQ); 662ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case2fi(SNE, USNE); 663ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case3(SGE, ISGE, USGE); 664ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case3(SLT, ISLT, USLT); 6654d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 666ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case2iu(ISHR, USHR); 667ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 668ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case2fi(SSG, ISSG); 6694d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann case3(ABS, IABS, IABS); 6704d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 6714d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann default: break; 672ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 673ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 674ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann assert(op != TGSI_OPCODE_LAST); 675ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann return op; 676ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann} 677ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 678ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_instruction * 679ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, 680ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann st_dst_reg dst, st_src_reg src0, st_src_reg src1, 681ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann unsigned elements) 682ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann{ 683ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann static const unsigned dot_opcodes[] = { 684ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4 685ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann }; 686ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 687e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return emit(ir, dot_opcodes[elements - 2], dst, src0, src1); 688ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann} 689ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 690ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann/** 691ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * Emits TGSI scalar opcodes to produce unique answers across channels. 692ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * 693ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * Some TGSI opcodes are scalar-only, like ARB_fp/vp. The src X 694ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * channel determines the result across all channels. So to do a vec4 695ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * of this operation, we want to emit a scalar per source channel used 696ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * to produce dest channels. 697ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann */ 698ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannvoid 699ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, 7004d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann st_dst_reg dst, 7014d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann st_src_reg orig_src0, st_src_reg orig_src1) 7024d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{ 7034d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann int i, j; 7044d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann int done_mask = ~dst.writemask; 7054d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 7064d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann /* TGSI RCP is a scalar operation splatting results to all channels, 7074d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * like ARB_fp/vp. So emit as many RCPs as necessary to cover our 7084d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * dst channels. 7094d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann */ 7104d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann for (i = 0; i < 4; i++) { 711ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann GLuint this_mask = (1 << i); 712ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann glsl_to_tgsi_instruction *inst; 7134d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann st_src_reg src0 = orig_src0; 7144d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann st_src_reg src1 = orig_src1; 7154d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 716ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if (done_mask & this_mask) 7174d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann continue; 718ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 719ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann GLuint src0_swiz = GET_SWZ(src0.swizzle, i); 720ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann GLuint src1_swiz = GET_SWZ(src1.swizzle, i); 7214d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann for (j = i + 1; j < 4; j++) { 7224d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann /* If there is another enabled component in the destination that is 7234d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * derived from the same inputs, generate its value on this pass as 7244d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * well. 7254d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann */ 7264d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann if (!(done_mask & (1 << j)) && 7274d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann GET_SWZ(src0.swizzle, j) == src0_swiz && 7284d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann GET_SWZ(src1.swizzle, j) == src1_swiz) { 7294d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this_mask |= (1 << j); 7304d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann } 7314d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann } 7324d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 7334d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann src0_swiz, src0_swiz); 7344d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz, 735ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann src1_swiz, src1_swiz); 736ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 737e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov inst = emit(ir, op, dst, src0, src1); 7384d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann inst->dst.writemask = this_mask; 739e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov done_mask |= this_mask; 7404d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann } 741e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 742ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 743ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannvoid 744ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, 745ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann st_dst_reg dst, st_src_reg src0) 746ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann{ 747ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann st_src_reg undef = undef_src; 748ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 749ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann undef.swizzle = SWIZZLE_XXXX; 750ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 751ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann emit_scalar(ir, op, dst, src0, undef); 7524d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann} 753ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 754ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannvoid 755ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, 756e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov st_dst_reg dst, st_src_reg src0) 7574d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{ 7584d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann int op = TGSI_OPCODE_ARL; 7594d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 7604d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT) 7614d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann op = TGSI_OPCODE_UARL; 7624d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 763e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov emit(NULL, op, dst, src0); 7644d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann} 7654d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 7664d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann/** 7674d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * Emit an TGSI_OPCODE_SCS instruction 7684d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * 7694d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * The \c SCS opcode functions a bit differently than the other TGSI opcodes. 770e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov * Instead of splatting its result across all four components of the 7714d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * destination, it writes one value to the \c x component and another value to 7724d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * the \c y component. 773e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov * 7744d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * \param ir IR instruction being processed 7754d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * \param op Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending 7764d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * on which value is desired. 7774d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * \param dst Destination register 7784d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * \param src Source register 7794d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann */ 7804d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannvoid 781ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op, 7824d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann st_dst_reg dst, 7834d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann const st_src_reg &src) 7844d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{ 7854d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann /* Vertex programs cannot use the SCS opcode. 7864d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann */ 7874d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) { 7884d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann emit_scalar(ir, op, dst, src); 7894d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann return; 7904d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann } 7914d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 7924d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1; 7934d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann const unsigned scs_mask = (1U << component); 7944d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann int done_mask = ~dst.writemask; 7955ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann st_src_reg tmp; 7965ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann 7974d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS); 7984d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 7994d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann /* If there are compnents in the destination that differ from the component 8004d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * that will be written by the SCS instrution, we'll need a temporary. 8014d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann */ 8024d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann if (scs_mask != unsigned(dst.writemask)) { 8034d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann tmp = get_temp(glsl_type::vec4_type); 8044d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann } 8054d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 8064d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann for (unsigned i = 0; i < 4; i++) { 8074d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann unsigned this_mask = (1U << i); 8084d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann st_src_reg src0 = src; 8094d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 810ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if ((done_mask & this_mask) != 0) 811ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann continue; 8124d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 8134d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann /* The source swizzle specified which component of the source generates 8144d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * sine / cosine for the current component in the destination. The SCS 8154d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * instruction requires that this value be swizzle to the X component. 8164d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * Replace the current swizzle with a swizzle that puts the source in 8174d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * the X component. 8184d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann */ 8194d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann unsigned src0_swiz = GET_SWZ(src.swizzle, i); 8204d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 8214d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 8224d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann src0_swiz, src0_swiz); 8234d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann for (unsigned j = i + 1; j < 4; j++) { 8244d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann /* If there is another enabled component in the destination that is 8254d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * derived from the same inputs, generate its value on this pass as 8264d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * well. 8274d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann */ 8284d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann if (!(done_mask & (1 << j)) && 8294d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann GET_SWZ(src0.swizzle, j) == src0_swiz) { 8304d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this_mask |= (1 << j); 8314d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann } 832ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 8334d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 834ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if (this_mask != scs_mask) { 835ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann glsl_to_tgsi_instruction *inst; 836ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann st_dst_reg tmp_dst = st_dst_reg(tmp); 837ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 838ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann /* Emit the SCS instruction. 839ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann */ 8404d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0); 8414d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann inst->dst.writemask = scs_mask; 8424d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 8434d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann /* Move the result of the SCS instruction to the desired location in 8444d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * the destination. 8454d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann */ 8464d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann tmp.swizzle = MAKE_SWIZZLE4(component, component, 8474d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann component, component); 8484d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp); 8494d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann inst->dst.writemask = this_mask; 8504d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann } else { 8514d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann /* Emit the SCS instruction to write directly to the destination. 8524d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann */ 8534d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0); 8544d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann inst->dst.writemask = scs_mask; 8554d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann } 8564d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 8574d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann done_mask |= this_mask; 8584d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann } 859ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann} 8604d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 861ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannint 862ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::add_constant(gl_register_file file, 863ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann gl_constant_value values[4], int size, int datatype, 864ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann GLuint *swizzle_out) 865ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann{ 866ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if (file == PROGRAM_CONSTANT) { 867ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values, 868ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann size, datatype, swizzle_out); 869ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } else { 870ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int index = 0; 871ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann immediate_storage *entry; 872ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann assert(file == PROGRAM_IMMEDIATE); 873ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 874ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann /* Search immediate storage to see if we already have an identical 875ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * immediate that we can use instead of adding a duplicate entry. 876ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann */ 877ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann foreach_iter(exec_list_iterator, iter, this->immediates) { 878ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann entry = (immediate_storage *)iter.get(); 879ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 880ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if (entry->size == size && 881ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann entry->type == datatype && 882ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann !memcmp(entry->values, values, size * sizeof(gl_constant_value))) { 883ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann return index; 884ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 885ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann index++; 886ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 887ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 888ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann /* Add this immediate to the list. */ 8895ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann entry = new(mem_ctx) immediate_storage(values, size, datatype); 8905ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann this->immediates.push_tail(entry); 891ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann this->num_immediates++; 892ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann return index; 8935ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann } 8945ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann} 8955ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann 8965ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmannst_src_reg 8975ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmannglsl_to_tgsi_visitor::st_src_reg_for_float(float val) 8985ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann{ 899ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT); 900ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann union gl_constant_value uval; 901ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 902ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann uval.f = val; 903ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle); 904ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 905ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann return src; 906ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann} 907ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 908ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannst_src_reg 909ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::st_src_reg_for_int(int val) 910ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann{ 911ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT); 912ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann union gl_constant_value uval; 913ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 914ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann assert(native_integers); 915ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 916ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann uval.i = val; 917ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle); 918ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 919ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann return src; 920ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann} 9215ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann 9225ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmannst_src_reg 9235ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmannglsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val) 9245ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann{ 9255ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann if (native_integers) 9265ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : 9275ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann st_src_reg_for_int(val); 928ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann else 929ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann return st_src_reg_for_float(val); 930e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 931ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 932ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannstatic int 933ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmanntype_size(const struct glsl_type *type) 9344d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{ 9354d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann unsigned int i; 9364d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann int size; 9374d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 938ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann switch (type->base_type) { 939e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov case GLSL_TYPE_UINT: 940e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov case GLSL_TYPE_INT: 941ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case GLSL_TYPE_FLOAT: 942ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case GLSL_TYPE_BOOL: 943ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if (type->is_matrix()) { 944ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann return type->matrix_columns; 945ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } else { 946ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann /* Regardless of size of vector, it gets a vec4. This is bad 947ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * packing for things like floats, but otherwise arrays become a 948ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * mess. Hopefully a later pass over the code can pack scalars 949ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * down if appropriate. 950ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann */ 951ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann return 1; 952ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 953ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case GLSL_TYPE_ARRAY: 954ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann assert(type->length > 0); 955ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann return type_size(type->fields.array) * type->length; 956ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case GLSL_TYPE_STRUCT: 957ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann size = 0; 958ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann for (i = 0; i < type->length; i++) { 959ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann size += type_size(type->fields.structure[i].type); 960ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 9614d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann return size; 962ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case GLSL_TYPE_SAMPLER: 963ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann /* Samplers take up one slot in UNIFORMS[], but they're baked in 964ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * at link time. 965ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann */ 966ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann return 1; 967e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov default: 968ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann assert(0); 969ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann return 0; 970ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 971ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann} 972ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 973ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann/** 974ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * In the initial pass of codegen, we assign temporary numbers to 975ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * intermediate results. (not SSA -- variable assignments will reuse 9764d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * storage). 977ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann */ 9784d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannst_src_reg 9794d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannglsl_to_tgsi_visitor::get_temp(const glsl_type *type) 9804d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{ 981ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann st_src_reg src; 982ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 983ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT; 984e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov src.file = PROGRAM_TEMPORARY; 985e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov src.index = next_temp; 986ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann src.reladdr = NULL; 9874d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann next_temp += type_size(type); 988e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov 989e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (type->is_array() || type->is_record()) { 990ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann src.swizzle = SWIZZLE_NOOP; 9914d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann } else { 992e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov src.swizzle = swizzle_for_size(type->vector_elements); 993e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 994ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann src.negate = 0; 9954d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 996ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann return src; 997e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 998ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 9994d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannvariable_storage * 1000ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::find_variable_storage(ir_variable *var) 1001e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 1002ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 10034d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann variable_storage *entry; 1004ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1005e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov foreach_iter(exec_list_iterator, iter, this->variables) { 10064d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann entry = (variable_storage *)iter.get(); 1007ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1008ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if (entry->var == var) 1009ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann return entry; 1010ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 1011ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1012ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann return NULL; 1013ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann} 10144d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 1015ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannvoid 1016ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::visit(ir_variable *ir) 1017ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann{ 1018ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if (strcmp(ir->name, "gl_FragCoord") == 0) { 1019ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; 10205ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann 10215ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann fp->OriginUpperLeft = ir->origin_upper_left; 10225ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann fp->PixelCenterInteger = ir->pixel_center_integer; 10235ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann } 1024e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov 1025e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { 1026ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann unsigned int i; 1027ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann const ir_state_slot *const slots = ir->state_slots; 1028ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann assert(ir->state_slots != NULL); 1029ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1030ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann /* Check if this statevar's setup in the STATE file exactly 1031ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * matches how we'll want to reference it as a 10324d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * struct/array/whatever. If not, then we need to move it into 1033ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * temporary storage and hope that it'll get copy-propagated 1034ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * out. 1035ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann */ 1036ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann for (i = 0; i < ir->num_state_slots; i++) { 10374d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann if (slots[i].swizzle != SWIZZLE_XYZW) { 10384d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann break; 1039ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 10404d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann } 1041ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1042ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann variable_storage *storage; 1043ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann st_dst_reg dst; 1044ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if (i == ir->num_state_slots) { 1045ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann /* We'll set the index later. */ 10464d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); 10474d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->variables.push_tail(storage); 10484d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 1049ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann dst = undef_dst; 1050ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } else { 1051ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann /* The variable_storage constructor allocates slots based on the size 1052ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * of the type. However, this had better match the number of state 10534d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * elements that we're going to copy into the new temporary. 1054ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann */ 1055e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov assert((int) ir->num_state_slots == type_size(ir->type)); 1056ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1057ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY, 1058ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann this->next_temp); 1059ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann this->variables.push_tail(storage); 10604d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->next_temp += type_size(ir->type); 1061ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1062ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, 1063ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT)); 1064e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 1065ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1066ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1067ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann for (unsigned int i = 0; i < ir->num_state_slots; i++) { 1068ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int index = _mesa_add_state_reference(this->prog->Parameters, 1069ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann (gl_state_index *)slots[i].tokens); 1070ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1071ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if (storage->file == PROGRAM_STATE_VAR) { 1072e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (storage->index == -1) { 1073ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann storage->index = index; 1074ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } else { 1075ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann assert(index == storage->index + (int)i); 1076ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 10774d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann } else { 1078ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann st_src_reg src(PROGRAM_STATE_VAR, index, 1079ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT); 1080e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov src.swizzle = slots[i].swizzle; 1081e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov emit(ir, TGSI_OPCODE_MOV, dst, src); 1082ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann /* even a float takes up a whole vec4 reg in a struct/array. */ 1083ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann dst.index++; 1084ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 1085ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 1086e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov 1087ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if (storage->file == PROGRAM_TEMPORARY && 1088ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann dst.index != storage->index + (int) ir->num_state_slots) { 1089ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann fail_link(this->shader_program, 1090ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann "failed to load builtin uniform `%s' (%d/%d regs loaded)\n", 1091ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann ir->name, dst.index - storage->index, 1092ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann type_size(ir->type)); 1093ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 1094ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 1095ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann} 1096e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov 1097e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid 10984d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannglsl_to_tgsi_visitor::visit(ir_loop *ir) 10994d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{ 11004d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann ir_dereference_variable *counter = NULL; 11014d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 11024d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann if (ir->counter != NULL) 11034d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann counter = new(ir) ir_dereference_variable(ir->counter); 11044d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 11054d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann if (ir->from != NULL) { 11064d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann assert(ir->counter != NULL); 11074d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 11084d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); 11094d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 11104d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann a->accept(this); 11114d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann delete a; 11124d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann } 11134d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 11144d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann emit(NULL, TGSI_OPCODE_BGNLOOP); 11154d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 11164d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann if (ir->to) { 1117ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann ir_expression *e = 1118ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann new(ir) ir_expression(ir->cmp, glsl_type::bool_type, 1119ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann counter, ir->to); 1120ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann ir_if *if_stmt = new(ir) ir_if(e); 1121ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1122ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); 1123ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1124ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if_stmt->then_instructions.push_tail(brk); 1125ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1126ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if_stmt->accept(this); 1127ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 11284d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann delete if_stmt; 1129ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann delete e; 1130ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann delete brk; 1131ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 1132ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1133ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann visit_exec_list(&ir->body_instructions, this); 1134ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1135ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if (ir->increment) { 1136ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann ir_expression *e = 1137ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann new(ir) ir_expression(ir_binop_add, counter->type, 1138ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann counter, ir->increment); 1139ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1140ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); 1141ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1142ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann a->accept(this); 1143ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann delete a; 1144ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann delete e; 1145ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 1146ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1147ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann emit(NULL, TGSI_OPCODE_ENDLOOP); 1148ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann} 1149ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1150ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannvoid 1151ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::visit(ir_loop_jump *ir) 11524d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{ 1153ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann switch (ir->mode) { 1154ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case ir_loop_jump::jump_break: 1155ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann emit(NULL, TGSI_OPCODE_BRK); 1156ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann break; 1157ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann case ir_loop_jump::jump_continue: 1158ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann emit(NULL, TGSI_OPCODE_CONT); 1159ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann break; 11604d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann } 11614d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann} 11624d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann 1163ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1164ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannvoid 1165ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::visit(ir_function_signature *ir) 1166ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann{ 1167ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann assert(0); 1168ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann (void)ir; 1169ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann} 1170ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1171ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannvoid 1172ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::visit(ir_function *ir) 11734d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{ 1174e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov /* Ignore function bodies other than main() -- we shouldn't see calls to 1175ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * them since they should all be inlined before we get to glsl_to_tgsi. 1176ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann */ 1177ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if (strcmp(ir->name, "main") == 0) { 1178ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann const ir_function_signature *sig; 1179ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann exec_list empty; 1180ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1181ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann sig = ir->matching_signature(&empty); 1182ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1183ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann assert(sig); 1184ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1185ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann foreach_iter(exec_list_iterator, iter, sig->body) { 1186ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann ir_instruction *ir = (ir_instruction *)iter.get(); 1187ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1188ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann ir->accept(this); 1189ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 1190ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 11914d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann} 1192ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1193ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannbool 1194ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) 1195ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann{ 1196ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int nonmul_operand = 1 - mul_operand; 11974d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann st_src_reg a, b, c; 1198ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann st_dst_reg result_dst; 1199ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1200ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann ir_expression *expr = ir->operands[mul_operand]->as_expression(); 1201ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if (!expr || expr->operation != ir_binop_mul) 1202ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann return false; 1203ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1204ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann expr->operands[0]->accept(this); 1205ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann a = this->result; 1206ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann expr->operands[1]->accept(this); 1207ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann b = this->result; 1208ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann ir->operands[nonmul_operand]->accept(this); 1209ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann c = this->result; 1210ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1211ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann this->result = get_temp(ir->type); 1212ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann result_dst = st_dst_reg(this->result); 1213ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1214ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c); 1215ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1216ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann return true; 1217ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann} 1218ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1219ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann/** 1220ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * Emit MAD(a, -b, a) instead of AND(a, NOT(b)) 1221ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * 1222ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * The logic values are 1.0 for true and 0.0 for false. Logical-and is 1223ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * implemented using multiplication, and logical-or is implemented using 1224ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * addition. Logical-not can be implemented as (true - x), or (1.0 - x). 1225ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * As result, the logical expression (a & !b) can be rewritten as: 1226ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * 12274d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * - a * !b 1228ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * - a * (1 - b) 1229ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * - (a * 1) - (a * b) 1230ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * - a + -(a * b) 1231ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * - a + (a * -b) 1232ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * 1233ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * This final expression can be implemented as a single MAD(a, -b, a) 1234ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * instruction. 1235ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann */ 1236ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannbool 1237ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) 1238ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann{ 1239ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann const int other_operand = 1 - try_operand; 12404d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann st_src_reg a, b; 1241ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1242ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann ir_expression *expr = ir->operands[try_operand]->as_expression(); 1243ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if (!expr || expr->operation != ir_unop_logic_not) 1244ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann return false; 1245ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1246e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov ir->operands[other_operand]->accept(this); 1247ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann a = this->result; 1248ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann expr->operands[0]->accept(this); 1249ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann b = this->result; 1250ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1251ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann b.negate = ~b.negate; 1252ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 12534d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann this->result = get_temp(ir->type); 1254ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a); 1255ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1256ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann return true; 1257ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann} 1258ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1259ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannbool 1260ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) 1261ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann{ 12624d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann /* Saturates were only introduced to vertex programs in 1263ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * NV_vertex_program3, so don't give them to drivers in the VP. 1264ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann */ 12654d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) 1266ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann return false; 1267ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1268ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); 1269ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if (!sat_src) 1270ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann return false; 1271ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 1272e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov sat_src->accept(this); 1273 st_src_reg src = this->result; 1274 1275 /* If we generated an expression instruction into a temporary in 1276 * processing the saturate's operand, apply the saturate to that 1277 * instruction. Otherwise, generate a MOV to do the saturate. 1278 * 1279 * Note that we have to be careful to only do this optimization if 1280 * the instruction in question was what generated src->result. For 1281 * example, ir_dereference_array might generate a MUL instruction 1282 * to create the reladdr, and return us a src reg using that 1283 * reladdr. That MUL result is not the value we're trying to 1284 * saturate. 1285 */ 1286 ir_expression *sat_src_expr = sat_src->as_expression(); 1287 if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul || 1288 sat_src_expr->operation == ir_binop_add || 1289 sat_src_expr->operation == ir_binop_dot)) { 1290 glsl_to_tgsi_instruction *new_inst; 1291 new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 1292 new_inst->saturate = true; 1293 } else { 1294 this->result = get_temp(ir->type); 1295 st_dst_reg result_dst = st_dst_reg(this->result); 1296 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1297 glsl_to_tgsi_instruction *inst; 1298 inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src); 1299 inst->saturate = true; 1300 } 1301 1302 return true; 1303} 1304 1305void 1306glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, 1307 st_src_reg *reg, int *num_reladdr) 1308{ 1309 if (!reg->reladdr) 1310 return; 1311 1312 emit_arl(ir, address_reg, *reg->reladdr); 1313 1314 if (*num_reladdr != 1) { 1315 st_src_reg temp = get_temp(glsl_type::vec4_type); 1316 1317 emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg); 1318 *reg = temp; 1319 } 1320 1321 (*num_reladdr)--; 1322} 1323 1324void 1325glsl_to_tgsi_visitor::visit(ir_expression *ir) 1326{ 1327 unsigned int operand; 1328 st_src_reg op[Elements(ir->operands)]; 1329 st_src_reg result_src; 1330 st_dst_reg result_dst; 1331 1332 /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c) 1333 */ 1334 if (ir->operation == ir_binop_add) { 1335 if (try_emit_mad(ir, 1)) 1336 return; 1337 if (try_emit_mad(ir, 0)) 1338 return; 1339 } 1340 1341 /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) 1342 */ 1343 if (ir->operation == ir_binop_logic_and) { 1344 if (try_emit_mad_for_and_not(ir, 1)) 1345 return; 1346 if (try_emit_mad_for_and_not(ir, 0)) 1347 return; 1348 } 1349 1350 if (try_emit_sat(ir)) 1351 return; 1352 1353 if (ir->operation == ir_quadop_vector) 1354 assert(!"ir_quadop_vector should have been lowered"); 1355 1356 for (operand = 0; operand < ir->get_num_operands(); operand++) { 1357 this->result.file = PROGRAM_UNDEFINED; 1358 ir->operands[operand]->accept(this); 1359 if (this->result.file == PROGRAM_UNDEFINED) { 1360 ir_print_visitor v; 1361 printf("Failed to get tree for expression operand:\n"); 1362 ir->operands[operand]->accept(&v); 1363 exit(1); 1364 } 1365 op[operand] = this->result; 1366 1367 /* Matrix expression operands should have been broken down to vector 1368 * operations already. 1369 */ 1370 assert(!ir->operands[operand]->type->is_matrix()); 1371 } 1372 1373 int vector_elements = ir->operands[0]->type->vector_elements; 1374 if (ir->operands[1]) { 1375 vector_elements = MAX2(vector_elements, 1376 ir->operands[1]->type->vector_elements); 1377 } 1378 1379 this->result.file = PROGRAM_UNDEFINED; 1380 1381 /* Storage for our result. Ideally for an assignment we'd be using 1382 * the actual storage for the result here, instead. 1383 */ 1384 result_src = get_temp(ir->type); 1385 /* convenience for the emit functions below. */ 1386 result_dst = st_dst_reg(result_src); 1387 /* Limit writes to the channels that will be used by result_src later. 1388 * This does limit this temp's use as a temporary for multi-instruction 1389 * sequences. 1390 */ 1391 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1392 1393 switch (ir->operation) { 1394 case ir_unop_logic_not: 1395 if (result_dst.type != GLSL_TYPE_FLOAT) 1396 emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); 1397 else { 1398 /* Previously 'SEQ dst, src, 0.0' was used for this. However, many 1399 * older GPUs implement SEQ using multiple instructions (i915 uses two 1400 * SGE instructions and a MUL instruction). Since our logic values are 1401 * 0.0 and 1.0, 1-x also implements !x. 1402 */ 1403 op[0].negate = ~op[0].negate; 1404 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0)); 1405 } 1406 break; 1407 case ir_unop_neg: 1408 if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT) 1409 emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); 1410 else { 1411 op[0].negate = ~op[0].negate; 1412 result_src = op[0]; 1413 } 1414 break; 1415 case ir_unop_abs: 1416 emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]); 1417 break; 1418 case ir_unop_sign: 1419 emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]); 1420 break; 1421 case ir_unop_rcp: 1422 emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]); 1423 break; 1424 1425 case ir_unop_exp2: 1426 emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]); 1427 break; 1428 case ir_unop_exp: 1429 case ir_unop_log: 1430 assert(!"not reached: should be handled by ir_explog_to_explog2"); 1431 break; 1432 case ir_unop_log2: 1433 emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]); 1434 break; 1435 case ir_unop_sin: 1436 emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]); 1437 break; 1438 case ir_unop_cos: 1439 emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]); 1440 break; 1441 case ir_unop_sin_reduced: 1442 emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]); 1443 break; 1444 case ir_unop_cos_reduced: 1445 emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]); 1446 break; 1447 1448 case ir_unop_dFdx: 1449 emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]); 1450 break; 1451 case ir_unop_dFdy: 1452 op[0].negate = ~op[0].negate; 1453 emit(ir, TGSI_OPCODE_DDY, result_dst, op[0]); 1454 break; 1455 1456 case ir_unop_noise: { 1457 /* At some point, a motivated person could add a better 1458 * implementation of noise. Currently not even the nvidia 1459 * binary drivers do anything more than this. In any case, the 1460 * place to do this is in the GL state tracker, not the poor 1461 * driver. 1462 */ 1463 emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); 1464 break; 1465 } 1466 1467 case ir_binop_add: 1468 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); 1469 break; 1470 case ir_binop_sub: 1471 emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]); 1472 break; 1473 1474 case ir_binop_mul: 1475 emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); 1476 break; 1477 case ir_binop_div: 1478 if (result_dst.type == GLSL_TYPE_FLOAT) 1479 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 1480 else 1481 emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]); 1482 break; 1483 case ir_binop_mod: 1484 if (result_dst.type == GLSL_TYPE_FLOAT) 1485 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 1486 else 1487 emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]); 1488 break; 1489 1490 case ir_binop_less: 1491 emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]); 1492 break; 1493 case ir_binop_greater: 1494 emit(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]); 1495 break; 1496 case ir_binop_lequal: 1497 emit(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]); 1498 break; 1499 case ir_binop_gequal: 1500 emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]); 1501 break; 1502 case ir_binop_equal: 1503 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); 1504 break; 1505 case ir_binop_nequal: 1506 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1507 break; 1508 case ir_binop_all_equal: 1509 /* "==" operator producing a scalar boolean. */ 1510 if (ir->operands[0]->type->is_vector() || 1511 ir->operands[1]->type->is_vector()) { 1512 st_src_reg temp = get_temp(native_integers ? 1513 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : 1514 glsl_type::vec4_type); 1515 1516 if (native_integers) { 1517 st_dst_reg temp_dst = st_dst_reg(temp); 1518 st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp); 1519 1520 emit(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]); 1521 1522 /* Emit 1-3 AND operations to combine the SEQ results. */ 1523 switch (ir->operands[0]->type->vector_elements) { 1524 case 2: 1525 break; 1526 case 3: 1527 temp_dst.writemask = WRITEMASK_Y; 1528 temp1.swizzle = SWIZZLE_YYYY; 1529 temp2.swizzle = SWIZZLE_ZZZZ; 1530 emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1531 break; 1532 case 4: 1533 temp_dst.writemask = WRITEMASK_X; 1534 temp1.swizzle = SWIZZLE_XXXX; 1535 temp2.swizzle = SWIZZLE_YYYY; 1536 emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1537 temp_dst.writemask = WRITEMASK_Y; 1538 temp1.swizzle = SWIZZLE_ZZZZ; 1539 temp2.swizzle = SWIZZLE_WWWW; 1540 emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1541 } 1542 1543 temp1.swizzle = SWIZZLE_XXXX; 1544 temp2.swizzle = SWIZZLE_YYYY; 1545 emit(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2); 1546 } else { 1547 emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); 1548 1549 /* After the dot-product, the value will be an integer on the 1550 * range [0,4]. Zero becomes 1.0, and positive values become zero. 1551 */ 1552 emit_dp(ir, result_dst, temp, temp, vector_elements); 1553 1554 /* Negating the result of the dot-product gives values on the range 1555 * [-4, 0]. Zero becomes 1.0, and negative values become zero. 1556 * This is achieved using SGE. 1557 */ 1558 st_src_reg sge_src = result_src; 1559 sge_src.negate = ~sge_src.negate; 1560 emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0)); 1561 } 1562 } else { 1563 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); 1564 } 1565 break; 1566 case ir_binop_any_nequal: 1567 /* "!=" operator producing a scalar boolean. */ 1568 if (ir->operands[0]->type->is_vector() || 1569 ir->operands[1]->type->is_vector()) { 1570 st_src_reg temp = get_temp(native_integers ? 1571 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : 1572 glsl_type::vec4_type); 1573 emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); 1574 1575 if (native_integers) { 1576 st_dst_reg temp_dst = st_dst_reg(temp); 1577 st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp); 1578 1579 /* Emit 1-3 OR operations to combine the SNE results. */ 1580 switch (ir->operands[0]->type->vector_elements) { 1581 case 2: 1582 break; 1583 case 3: 1584 temp_dst.writemask = WRITEMASK_Y; 1585 temp1.swizzle = SWIZZLE_YYYY; 1586 temp2.swizzle = SWIZZLE_ZZZZ; 1587 emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1588 break; 1589 case 4: 1590 temp_dst.writemask = WRITEMASK_X; 1591 temp1.swizzle = SWIZZLE_XXXX; 1592 temp2.swizzle = SWIZZLE_YYYY; 1593 emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1594 temp_dst.writemask = WRITEMASK_Y; 1595 temp1.swizzle = SWIZZLE_ZZZZ; 1596 temp2.swizzle = SWIZZLE_WWWW; 1597 emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1598 } 1599 1600 temp1.swizzle = SWIZZLE_XXXX; 1601 temp2.swizzle = SWIZZLE_YYYY; 1602 emit(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2); 1603 } else { 1604 /* After the dot-product, the value will be an integer on the 1605 * range [0,4]. Zero stays zero, and positive values become 1.0. 1606 */ 1607 glsl_to_tgsi_instruction *const dp = 1608 emit_dp(ir, result_dst, temp, temp, vector_elements); 1609 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1610 /* The clamping to [0,1] can be done for free in the fragment 1611 * shader with a saturate. 1612 */ 1613 dp->saturate = true; 1614 } else { 1615 /* Negating the result of the dot-product gives values on the range 1616 * [-4, 0]. Zero stays zero, and negative values become 1.0. This 1617 * achieved using SLT. 1618 */ 1619 st_src_reg slt_src = result_src; 1620 slt_src.negate = ~slt_src.negate; 1621 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); 1622 } 1623 } 1624 } else { 1625 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1626 } 1627 break; 1628 1629 case ir_unop_any: { 1630 assert(ir->operands[0]->type->is_vector()); 1631 1632 /* After the dot-product, the value will be an integer on the 1633 * range [0,4]. Zero stays zero, and positive values become 1.0. 1634 */ 1635 glsl_to_tgsi_instruction *const dp = 1636 emit_dp(ir, result_dst, op[0], op[0], 1637 ir->operands[0]->type->vector_elements); 1638 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && 1639 result_dst.type == GLSL_TYPE_FLOAT) { 1640 /* The clamping to [0,1] can be done for free in the fragment 1641 * shader with a saturate. 1642 */ 1643 dp->saturate = true; 1644 } else if (result_dst.type == GLSL_TYPE_FLOAT) { 1645 /* Negating the result of the dot-product gives values on the range 1646 * [-4, 0]. Zero stays zero, and negative values become 1.0. This 1647 * is achieved using SLT. 1648 */ 1649 st_src_reg slt_src = result_src; 1650 slt_src.negate = ~slt_src.negate; 1651 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); 1652 } 1653 else { 1654 /* Use SNE 0 if integers are being used as boolean values. */ 1655 emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); 1656 } 1657 break; 1658 } 1659 1660 case ir_binop_logic_xor: 1661 if (native_integers) 1662 emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); 1663 else 1664 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1665 break; 1666 1667 case ir_binop_logic_or: { 1668 if (native_integers) { 1669 /* If integers are used as booleans, we can use an actual "or" 1670 * instruction. 1671 */ 1672 assert(native_integers); 1673 emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); 1674 } else { 1675 /* After the addition, the value will be an integer on the 1676 * range [0,2]. Zero stays zero, and positive values become 1.0. 1677 */ 1678 glsl_to_tgsi_instruction *add = 1679 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); 1680 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1681 /* The clamping to [0,1] can be done for free in the fragment 1682 * shader with a saturate if floats are being used as boolean values. 1683 */ 1684 add->saturate = true; 1685 } else { 1686 /* Negating the result of the addition gives values on the range 1687 * [-2, 0]. Zero stays zero, and negative values become 1.0. This 1688 * is achieved using SLT. 1689 */ 1690 st_src_reg slt_src = result_src; 1691 slt_src.negate = ~slt_src.negate; 1692 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); 1693 } 1694 } 1695 break; 1696 } 1697 1698 case ir_binop_logic_and: 1699 /* If native integers are disabled, the bool args are stored as float 0.0 1700 * or 1.0, so "mul" gives us "and". If they're enabled, just use the 1701 * actual AND opcode. 1702 */ 1703 if (native_integers) 1704 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); 1705 else 1706 emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); 1707 break; 1708 1709 case ir_binop_dot: 1710 assert(ir->operands[0]->type->is_vector()); 1711 assert(ir->operands[0]->type == ir->operands[1]->type); 1712 emit_dp(ir, result_dst, op[0], op[1], 1713 ir->operands[0]->type->vector_elements); 1714 break; 1715 1716 case ir_unop_sqrt: 1717 /* sqrt(x) = x * rsq(x). */ 1718 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); 1719 emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]); 1720 /* For incoming channels <= 0, set the result to 0. */ 1721 op[0].negate = ~op[0].negate; 1722 emit(ir, TGSI_OPCODE_CMP, result_dst, 1723 op[0], result_src, st_src_reg_for_float(0.0)); 1724 break; 1725 case ir_unop_rsq: 1726 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); 1727 break; 1728 case ir_unop_i2f: 1729 if (native_integers) { 1730 emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]); 1731 break; 1732 } 1733 /* fallthrough to next case otherwise */ 1734 case ir_unop_b2f: 1735 if (native_integers) { 1736 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0)); 1737 break; 1738 } 1739 /* fallthrough to next case otherwise */ 1740 case ir_unop_i2u: 1741 case ir_unop_u2i: 1742 /* Converting between signed and unsigned integers is a no-op. */ 1743 result_src = op[0]; 1744 break; 1745 case ir_unop_b2i: 1746 if (native_integers) { 1747 /* Booleans are stored as integers using ~0 for true and 0 for false. 1748 * GLSL requires that int(bool) return 1 for true and 0 for false. 1749 * This conversion is done with AND, but it could be done with NEG. 1750 */ 1751 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1)); 1752 } else { 1753 /* Booleans and integers are both stored as floats when native 1754 * integers are disabled. 1755 */ 1756 result_src = op[0]; 1757 } 1758 break; 1759 case ir_unop_f2i: 1760 if (native_integers) 1761 emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]); 1762 else 1763 emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1764 break; 1765 case ir_unop_bitcast_f2i: 1766 case ir_unop_bitcast_f2u: 1767 case ir_unop_bitcast_i2f: 1768 case ir_unop_bitcast_u2f: 1769 result_src = op[0]; 1770 break; 1771 case ir_unop_f2b: 1772 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); 1773 break; 1774 case ir_unop_i2b: 1775 if (native_integers) 1776 emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); 1777 else 1778 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); 1779 break; 1780 case ir_unop_trunc: 1781 emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1782 break; 1783 case ir_unop_ceil: 1784 emit(ir, TGSI_OPCODE_CEIL, result_dst, op[0]); 1785 break; 1786 case ir_unop_floor: 1787 emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); 1788 break; 1789 case ir_unop_round_even: 1790 emit(ir, TGSI_OPCODE_ROUND, result_dst, op[0]); 1791 break; 1792 case ir_unop_fract: 1793 emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]); 1794 break; 1795 1796 case ir_binop_min: 1797 emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]); 1798 break; 1799 case ir_binop_max: 1800 emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]); 1801 break; 1802 case ir_binop_pow: 1803 emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]); 1804 break; 1805 1806 case ir_unop_bit_not: 1807 if (native_integers) { 1808 emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); 1809 break; 1810 } 1811 case ir_unop_u2f: 1812 if (native_integers) { 1813 emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]); 1814 break; 1815 } 1816 case ir_binop_lshift: 1817 if (native_integers) { 1818 emit(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]); 1819 break; 1820 } 1821 case ir_binop_rshift: 1822 if (native_integers) { 1823 emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]); 1824 break; 1825 } 1826 case ir_binop_bit_and: 1827 if (native_integers) { 1828 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); 1829 break; 1830 } 1831 case ir_binop_bit_xor: 1832 if (native_integers) { 1833 emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); 1834 break; 1835 } 1836 case ir_binop_bit_or: 1837 if (native_integers) { 1838 emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); 1839 break; 1840 } 1841 1842 assert(!"GLSL 1.30 features unsupported"); 1843 break; 1844 1845 case ir_quadop_vector: 1846 /* This operation should have already been handled. 1847 */ 1848 assert(!"Should not get here."); 1849 break; 1850 } 1851 1852 this->result = result_src; 1853} 1854 1855 1856void 1857glsl_to_tgsi_visitor::visit(ir_swizzle *ir) 1858{ 1859 st_src_reg src; 1860 int i; 1861 int swizzle[4]; 1862 1863 /* Note that this is only swizzles in expressions, not those on the left 1864 * hand side of an assignment, which do write masking. See ir_assignment 1865 * for that. 1866 */ 1867 1868 ir->val->accept(this); 1869 src = this->result; 1870 assert(src.file != PROGRAM_UNDEFINED); 1871 1872 for (i = 0; i < 4; i++) { 1873 if (i < ir->type->vector_elements) { 1874 switch (i) { 1875 case 0: 1876 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x); 1877 break; 1878 case 1: 1879 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y); 1880 break; 1881 case 2: 1882 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z); 1883 break; 1884 case 3: 1885 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w); 1886 break; 1887 } 1888 } else { 1889 /* If the type is smaller than a vec4, replicate the last 1890 * channel out. 1891 */ 1892 swizzle[i] = swizzle[ir->type->vector_elements - 1]; 1893 } 1894 } 1895 1896 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 1897 1898 this->result = src; 1899} 1900 1901void 1902glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) 1903{ 1904 variable_storage *entry = find_variable_storage(ir->var); 1905 ir_variable *var = ir->var; 1906 1907 if (!entry) { 1908 switch (var->mode) { 1909 case ir_var_uniform: 1910 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, 1911 var->location); 1912 this->variables.push_tail(entry); 1913 break; 1914 case ir_var_in: 1915 case ir_var_inout: 1916 /* The linker assigns locations for varyings and attributes, 1917 * including deprecated builtins (like gl_Color), user-assign 1918 * generic attributes (glBindVertexLocation), and 1919 * user-defined varyings. 1920 * 1921 * FINISHME: We would hit this path for function arguments. Fix! 1922 */ 1923 assert(var->location != -1); 1924 entry = new(mem_ctx) variable_storage(var, 1925 PROGRAM_INPUT, 1926 var->location); 1927 break; 1928 case ir_var_out: 1929 assert(var->location != -1); 1930 entry = new(mem_ctx) variable_storage(var, 1931 PROGRAM_OUTPUT, 1932 var->location + var->index); 1933 break; 1934 case ir_var_system_value: 1935 entry = new(mem_ctx) variable_storage(var, 1936 PROGRAM_SYSTEM_VALUE, 1937 var->location); 1938 break; 1939 case ir_var_auto: 1940 case ir_var_temporary: 1941 entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY, 1942 this->next_temp); 1943 this->variables.push_tail(entry); 1944 1945 next_temp += type_size(var->type); 1946 break; 1947 } 1948 1949 if (!entry) { 1950 printf("Failed to make storage for %s\n", var->name); 1951 exit(1); 1952 } 1953 } 1954 1955 this->result = st_src_reg(entry->file, entry->index, var->type); 1956 if (!native_integers) 1957 this->result.type = GLSL_TYPE_FLOAT; 1958} 1959 1960void 1961glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) 1962{ 1963 ir_constant *index; 1964 st_src_reg src; 1965 int element_size = type_size(ir->type); 1966 1967 index = ir->array_index->constant_expression_value(); 1968 1969 ir->array->accept(this); 1970 src = this->result; 1971 1972 if (index) { 1973 src.index += index->value.i[0] * element_size; 1974 } else { 1975 /* Variable index array dereference. It eats the "vec4" of the 1976 * base of the array and an index that offsets the TGSI register 1977 * index. 1978 */ 1979 ir->array_index->accept(this); 1980 1981 st_src_reg index_reg; 1982 1983 if (element_size == 1) { 1984 index_reg = this->result; 1985 } else { 1986 index_reg = get_temp(native_integers ? 1987 glsl_type::int_type : glsl_type::float_type); 1988 1989 emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg), 1990 this->result, st_src_reg_for_type(index_reg.type, element_size)); 1991 } 1992 1993 /* If there was already a relative address register involved, add the 1994 * new and the old together to get the new offset. 1995 */ 1996 if (src.reladdr != NULL) { 1997 st_src_reg accum_reg = get_temp(native_integers ? 1998 glsl_type::int_type : glsl_type::float_type); 1999 2000 emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg), 2001 index_reg, *src.reladdr); 2002 2003 index_reg = accum_reg; 2004 } 2005 2006 src.reladdr = ralloc(mem_ctx, st_src_reg); 2007 memcpy(src.reladdr, &index_reg, sizeof(index_reg)); 2008 } 2009 2010 /* If the type is smaller than a vec4, replicate the last channel out. */ 2011 if (ir->type->is_scalar() || ir->type->is_vector()) 2012 src.swizzle = swizzle_for_size(ir->type->vector_elements); 2013 else 2014 src.swizzle = SWIZZLE_NOOP; 2015 2016 this->result = src; 2017} 2018 2019void 2020glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) 2021{ 2022 unsigned int i; 2023 const glsl_type *struct_type = ir->record->type; 2024 int offset = 0; 2025 2026 ir->record->accept(this); 2027 2028 for (i = 0; i < struct_type->length; i++) { 2029 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 2030 break; 2031 offset += type_size(struct_type->fields.structure[i].type); 2032 } 2033 2034 /* If the type is smaller than a vec4, replicate the last channel out. */ 2035 if (ir->type->is_scalar() || ir->type->is_vector()) 2036 this->result.swizzle = swizzle_for_size(ir->type->vector_elements); 2037 else 2038 this->result.swizzle = SWIZZLE_NOOP; 2039 2040 this->result.index += offset; 2041} 2042 2043/** 2044 * We want to be careful in assignment setup to hit the actual storage 2045 * instead of potentially using a temporary like we might with the 2046 * ir_dereference handler. 2047 */ 2048static st_dst_reg 2049get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v) 2050{ 2051 /* The LHS must be a dereference. If the LHS is a variable indexed array 2052 * access of a vector, it must be separated into a series conditional moves 2053 * before reaching this point (see ir_vec_index_to_cond_assign). 2054 */ 2055 assert(ir->as_dereference()); 2056 ir_dereference_array *deref_array = ir->as_dereference_array(); 2057 if (deref_array) { 2058 assert(!deref_array->array->type->is_vector()); 2059 } 2060 2061 /* Use the rvalue deref handler for the most part. We'll ignore 2062 * swizzles in it and write swizzles using writemask, though. 2063 */ 2064 ir->accept(v); 2065 return st_dst_reg(v->result); 2066} 2067 2068/** 2069 * Process the condition of a conditional assignment 2070 * 2071 * Examines the condition of a conditional assignment to generate the optimal 2072 * first operand of a \c CMP instruction. If the condition is a relational 2073 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be 2074 * used as the source for the \c CMP instruction. Otherwise the comparison 2075 * is processed to a boolean result, and the boolean result is used as the 2076 * operand to the CMP instruction. 2077 */ 2078bool 2079glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir) 2080{ 2081 ir_rvalue *src_ir = ir; 2082 bool negate = true; 2083 bool switch_order = false; 2084 2085 ir_expression *const expr = ir->as_expression(); 2086 if ((expr != NULL) && (expr->get_num_operands() == 2)) { 2087 bool zero_on_left = false; 2088 2089 if (expr->operands[0]->is_zero()) { 2090 src_ir = expr->operands[1]; 2091 zero_on_left = true; 2092 } else if (expr->operands[1]->is_zero()) { 2093 src_ir = expr->operands[0]; 2094 zero_on_left = false; 2095 } 2096 2097 /* a is - 0 + - 0 + 2098 * (a < 0) T F F ( a < 0) T F F 2099 * (0 < a) F F T (-a < 0) F F T 2100 * (a <= 0) T T F (-a < 0) F F T (swap order of other operands) 2101 * (0 <= a) F T T ( a < 0) T F F (swap order of other operands) 2102 * (a > 0) F F T (-a < 0) F F T 2103 * (0 > a) T F F ( a < 0) T F F 2104 * (a >= 0) F T T ( a < 0) T F F (swap order of other operands) 2105 * (0 >= a) T T F (-a < 0) F F T (swap order of other operands) 2106 * 2107 * Note that exchanging the order of 0 and 'a' in the comparison simply 2108 * means that the value of 'a' should be negated. 2109 */ 2110 if (src_ir != ir) { 2111 switch (expr->operation) { 2112 case ir_binop_less: 2113 switch_order = false; 2114 negate = zero_on_left; 2115 break; 2116 2117 case ir_binop_greater: 2118 switch_order = false; 2119 negate = !zero_on_left; 2120 break; 2121 2122 case ir_binop_lequal: 2123 switch_order = true; 2124 negate = !zero_on_left; 2125 break; 2126 2127 case ir_binop_gequal: 2128 switch_order = true; 2129 negate = zero_on_left; 2130 break; 2131 2132 default: 2133 /* This isn't the right kind of comparison afterall, so make sure 2134 * the whole condition is visited. 2135 */ 2136 src_ir = ir; 2137 break; 2138 } 2139 } 2140 } 2141 2142 src_ir->accept(this); 2143 2144 /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the 2145 * condition we produced is 0.0 or 1.0. By flipping the sign, we can 2146 * choose which value TGSI_OPCODE_CMP produces without an extra instruction 2147 * computing the condition. 2148 */ 2149 if (negate) 2150 this->result.negate = ~this->result.negate; 2151 2152 return switch_order; 2153} 2154 2155void 2156glsl_to_tgsi_visitor::visit(ir_assignment *ir) 2157{ 2158 st_dst_reg l; 2159 st_src_reg r; 2160 int i; 2161 2162 ir->rhs->accept(this); 2163 r = this->result; 2164 2165 l = get_assignment_lhs(ir->lhs, this); 2166 2167 /* FINISHME: This should really set to the correct maximal writemask for each 2168 * FINISHME: component written (in the loops below). This case can only 2169 * FINISHME: occur for matrices, arrays, and structures. 2170 */ 2171 if (ir->write_mask == 0) { 2172 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); 2173 l.writemask = WRITEMASK_XYZW; 2174 } else if (ir->lhs->type->is_scalar() && 2175 ir->lhs->variable_referenced()->mode == ir_var_out) { 2176 /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the 2177 * FINISHME: W component of fragment shader output zero, work correctly. 2178 */ 2179 l.writemask = WRITEMASK_XYZW; 2180 } else { 2181 int swizzles[4]; 2182 int first_enabled_chan = 0; 2183 int rhs_chan = 0; 2184 2185 l.writemask = ir->write_mask; 2186 2187 for (int i = 0; i < 4; i++) { 2188 if (l.writemask & (1 << i)) { 2189 first_enabled_chan = GET_SWZ(r.swizzle, i); 2190 break; 2191 } 2192 } 2193 2194 /* Swizzle a small RHS vector into the channels being written. 2195 * 2196 * glsl ir treats write_mask as dictating how many channels are 2197 * present on the RHS while TGSI treats write_mask as just 2198 * showing which channels of the vec4 RHS get written. 2199 */ 2200 for (int i = 0; i < 4; i++) { 2201 if (l.writemask & (1 << i)) 2202 swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++); 2203 else 2204 swizzles[i] = first_enabled_chan; 2205 } 2206 r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1], 2207 swizzles[2], swizzles[3]); 2208 } 2209 2210 assert(l.file != PROGRAM_UNDEFINED); 2211 assert(r.file != PROGRAM_UNDEFINED); 2212 2213 if (ir->condition) { 2214 const bool switch_order = this->process_move_condition(ir->condition); 2215 st_src_reg condition = this->result; 2216 2217 for (i = 0; i < type_size(ir->lhs->type); i++) { 2218 st_src_reg l_src = st_src_reg(l); 2219 st_src_reg condition_temp = condition; 2220 l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements); 2221 2222 if (native_integers) { 2223 /* This is necessary because TGSI's CMP instruction expects the 2224 * condition to be a float, and we store booleans as integers. 2225 * If TGSI had a UCMP instruction or similar, this extra 2226 * instruction would not be necessary. 2227 */ 2228 condition_temp = get_temp(glsl_type::vec4_type); 2229 condition.negate = 0; 2230 emit(ir, TGSI_OPCODE_I2F, st_dst_reg(condition_temp), condition); 2231 condition_temp.swizzle = condition.swizzle; 2232 } 2233 2234 if (switch_order) { 2235 emit(ir, TGSI_OPCODE_CMP, l, condition_temp, l_src, r); 2236 } else { 2237 emit(ir, TGSI_OPCODE_CMP, l, condition_temp, r, l_src); 2238 } 2239 2240 l.index++; 2241 r.index++; 2242 } 2243 } else if (ir->rhs->as_expression() && 2244 this->instructions.get_tail() && 2245 ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir && 2246 type_size(ir->lhs->type) == 1 && 2247 l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) { 2248 /* To avoid emitting an extra MOV when assigning an expression to a 2249 * variable, emit the last instruction of the expression again, but 2250 * replace the destination register with the target of the assignment. 2251 * Dead code elimination will remove the original instruction. 2252 */ 2253 glsl_to_tgsi_instruction *inst, *new_inst; 2254 inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2255 new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]); 2256 new_inst->saturate = inst->saturate; 2257 inst->dead_mask = inst->dst.writemask; 2258 } else { 2259 for (i = 0; i < type_size(ir->lhs->type); i++) { 2260 emit(ir, TGSI_OPCODE_MOV, l, r); 2261 l.index++; 2262 r.index++; 2263 } 2264 } 2265} 2266 2267 2268void 2269glsl_to_tgsi_visitor::visit(ir_constant *ir) 2270{ 2271 st_src_reg src; 2272 GLfloat stack_vals[4] = { 0 }; 2273 gl_constant_value *values = (gl_constant_value *) stack_vals; 2274 GLenum gl_type = GL_NONE; 2275 unsigned int i; 2276 static int in_array = 0; 2277 gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE; 2278 2279 /* Unfortunately, 4 floats is all we can get into 2280 * _mesa_add_typed_unnamed_constant. So, make a temp to store an 2281 * aggregate constant and move each constant value into it. If we 2282 * get lucky, copy propagation will eliminate the extra moves. 2283 */ 2284 if (ir->type->base_type == GLSL_TYPE_STRUCT) { 2285 st_src_reg temp_base = get_temp(ir->type); 2286 st_dst_reg temp = st_dst_reg(temp_base); 2287 2288 foreach_iter(exec_list_iterator, iter, ir->components) { 2289 ir_constant *field_value = (ir_constant *)iter.get(); 2290 int size = type_size(field_value->type); 2291 2292 assert(size > 0); 2293 2294 field_value->accept(this); 2295 src = this->result; 2296 2297 for (i = 0; i < (unsigned int)size; i++) { 2298 emit(ir, TGSI_OPCODE_MOV, temp, src); 2299 2300 src.index++; 2301 temp.index++; 2302 } 2303 } 2304 this->result = temp_base; 2305 return; 2306 } 2307 2308 if (ir->type->is_array()) { 2309 st_src_reg temp_base = get_temp(ir->type); 2310 st_dst_reg temp = st_dst_reg(temp_base); 2311 int size = type_size(ir->type->fields.array); 2312 2313 assert(size > 0); 2314 in_array++; 2315 2316 for (i = 0; i < ir->type->length; i++) { 2317 ir->array_elements[i]->accept(this); 2318 src = this->result; 2319 for (int j = 0; j < size; j++) { 2320 emit(ir, TGSI_OPCODE_MOV, temp, src); 2321 2322 src.index++; 2323 temp.index++; 2324 } 2325 } 2326 this->result = temp_base; 2327 in_array--; 2328 return; 2329 } 2330 2331 if (ir->type->is_matrix()) { 2332 st_src_reg mat = get_temp(ir->type); 2333 st_dst_reg mat_column = st_dst_reg(mat); 2334 2335 for (i = 0; i < ir->type->matrix_columns; i++) { 2336 assert(ir->type->base_type == GLSL_TYPE_FLOAT); 2337 values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements]; 2338 2339 src = st_src_reg(file, -1, ir->type->base_type); 2340 src.index = add_constant(file, 2341 values, 2342 ir->type->vector_elements, 2343 GL_FLOAT, 2344 &src.swizzle); 2345 emit(ir, TGSI_OPCODE_MOV, mat_column, src); 2346 2347 mat_column.index++; 2348 } 2349 2350 this->result = mat; 2351 return; 2352 } 2353 2354 switch (ir->type->base_type) { 2355 case GLSL_TYPE_FLOAT: 2356 gl_type = GL_FLOAT; 2357 for (i = 0; i < ir->type->vector_elements; i++) { 2358 values[i].f = ir->value.f[i]; 2359 } 2360 break; 2361 case GLSL_TYPE_UINT: 2362 gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT; 2363 for (i = 0; i < ir->type->vector_elements; i++) { 2364 if (native_integers) 2365 values[i].u = ir->value.u[i]; 2366 else 2367 values[i].f = ir->value.u[i]; 2368 } 2369 break; 2370 case GLSL_TYPE_INT: 2371 gl_type = native_integers ? GL_INT : GL_FLOAT; 2372 for (i = 0; i < ir->type->vector_elements; i++) { 2373 if (native_integers) 2374 values[i].i = ir->value.i[i]; 2375 else 2376 values[i].f = ir->value.i[i]; 2377 } 2378 break; 2379 case GLSL_TYPE_BOOL: 2380 gl_type = native_integers ? GL_BOOL : GL_FLOAT; 2381 for (i = 0; i < ir->type->vector_elements; i++) { 2382 if (native_integers) 2383 values[i].u = ir->value.b[i] ? ~0 : 0; 2384 else 2385 values[i].f = ir->value.b[i]; 2386 } 2387 break; 2388 default: 2389 assert(!"Non-float/uint/int/bool constant"); 2390 } 2391 2392 this->result = st_src_reg(file, -1, ir->type); 2393 this->result.index = add_constant(file, 2394 values, 2395 ir->type->vector_elements, 2396 gl_type, 2397 &this->result.swizzle); 2398} 2399 2400function_entry * 2401glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig) 2402{ 2403 function_entry *entry; 2404 2405 foreach_iter(exec_list_iterator, iter, this->function_signatures) { 2406 entry = (function_entry *)iter.get(); 2407 2408 if (entry->sig == sig) 2409 return entry; 2410 } 2411 2412 entry = ralloc(mem_ctx, function_entry); 2413 entry->sig = sig; 2414 entry->sig_id = this->next_signature_id++; 2415 entry->bgn_inst = NULL; 2416 2417 /* Allocate storage for all the parameters. */ 2418 foreach_iter(exec_list_iterator, iter, sig->parameters) { 2419 ir_variable *param = (ir_variable *)iter.get(); 2420 variable_storage *storage; 2421 2422 storage = find_variable_storage(param); 2423 assert(!storage); 2424 2425 storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY, 2426 this->next_temp); 2427 this->variables.push_tail(storage); 2428 2429 this->next_temp += type_size(param->type); 2430 } 2431 2432 if (!sig->return_type->is_void()) { 2433 entry->return_reg = get_temp(sig->return_type); 2434 } else { 2435 entry->return_reg = undef_src; 2436 } 2437 2438 this->function_signatures.push_tail(entry); 2439 return entry; 2440} 2441 2442void 2443glsl_to_tgsi_visitor::visit(ir_call *ir) 2444{ 2445 glsl_to_tgsi_instruction *call_inst; 2446 ir_function_signature *sig = ir->callee; 2447 function_entry *entry = get_function_signature(sig); 2448 int i; 2449 2450 /* Process in parameters. */ 2451 exec_list_iterator sig_iter = sig->parameters.iterator(); 2452 foreach_iter(exec_list_iterator, iter, *ir) { 2453 ir_rvalue *param_rval = (ir_rvalue *)iter.get(); 2454 ir_variable *param = (ir_variable *)sig_iter.get(); 2455 2456 if (param->mode == ir_var_in || 2457 param->mode == ir_var_inout) { 2458 variable_storage *storage = find_variable_storage(param); 2459 assert(storage); 2460 2461 param_rval->accept(this); 2462 st_src_reg r = this->result; 2463 2464 st_dst_reg l; 2465 l.file = storage->file; 2466 l.index = storage->index; 2467 l.reladdr = NULL; 2468 l.writemask = WRITEMASK_XYZW; 2469 l.cond_mask = COND_TR; 2470 2471 for (i = 0; i < type_size(param->type); i++) { 2472 emit(ir, TGSI_OPCODE_MOV, l, r); 2473 l.index++; 2474 r.index++; 2475 } 2476 } 2477 2478 sig_iter.next(); 2479 } 2480 assert(!sig_iter.has_next()); 2481 2482 /* Emit call instruction */ 2483 call_inst = emit(ir, TGSI_OPCODE_CAL); 2484 call_inst->function = entry; 2485 2486 /* Process out parameters. */ 2487 sig_iter = sig->parameters.iterator(); 2488 foreach_iter(exec_list_iterator, iter, *ir) { 2489 ir_rvalue *param_rval = (ir_rvalue *)iter.get(); 2490 ir_variable *param = (ir_variable *)sig_iter.get(); 2491 2492 if (param->mode == ir_var_out || 2493 param->mode == ir_var_inout) { 2494 variable_storage *storage = find_variable_storage(param); 2495 assert(storage); 2496 2497 st_src_reg r; 2498 r.file = storage->file; 2499 r.index = storage->index; 2500 r.reladdr = NULL; 2501 r.swizzle = SWIZZLE_NOOP; 2502 r.negate = 0; 2503 2504 param_rval->accept(this); 2505 st_dst_reg l = st_dst_reg(this->result); 2506 2507 for (i = 0; i < type_size(param->type); i++) { 2508 emit(ir, TGSI_OPCODE_MOV, l, r); 2509 l.index++; 2510 r.index++; 2511 } 2512 } 2513 2514 sig_iter.next(); 2515 } 2516 assert(!sig_iter.has_next()); 2517 2518 /* Process return value. */ 2519 this->result = entry->return_reg; 2520} 2521 2522void 2523glsl_to_tgsi_visitor::visit(ir_texture *ir) 2524{ 2525 st_src_reg result_src, coord, lod_info, projector, dx, dy, offset; 2526 st_dst_reg result_dst, coord_dst; 2527 glsl_to_tgsi_instruction *inst = NULL; 2528 unsigned opcode = TGSI_OPCODE_NOP; 2529 2530 if (ir->coordinate) { 2531 ir->coordinate->accept(this); 2532 2533 /* Put our coords in a temp. We'll need to modify them for shadow, 2534 * projection, or LOD, so the only case we'd use it as is is if 2535 * we're doing plain old texturing. The optimization passes on 2536 * glsl_to_tgsi_visitor should handle cleaning up our mess in that case. 2537 */ 2538 coord = get_temp(glsl_type::vec4_type); 2539 coord_dst = st_dst_reg(coord); 2540 emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); 2541 } 2542 2543 if (ir->projector) { 2544 ir->projector->accept(this); 2545 projector = this->result; 2546 } 2547 2548 /* Storage for our result. Ideally for an assignment we'd be using 2549 * the actual storage for the result here, instead. 2550 */ 2551 result_src = get_temp(glsl_type::vec4_type); 2552 result_dst = st_dst_reg(result_src); 2553 2554 switch (ir->op) { 2555 case ir_tex: 2556 opcode = TGSI_OPCODE_TEX; 2557 break; 2558 case ir_txb: 2559 opcode = TGSI_OPCODE_TXB; 2560 ir->lod_info.bias->accept(this); 2561 lod_info = this->result; 2562 break; 2563 case ir_txl: 2564 opcode = TGSI_OPCODE_TXL; 2565 ir->lod_info.lod->accept(this); 2566 lod_info = this->result; 2567 break; 2568 case ir_txd: 2569 opcode = TGSI_OPCODE_TXD; 2570 ir->lod_info.grad.dPdx->accept(this); 2571 dx = this->result; 2572 ir->lod_info.grad.dPdy->accept(this); 2573 dy = this->result; 2574 break; 2575 case ir_txs: 2576 opcode = TGSI_OPCODE_TXQ; 2577 ir->lod_info.lod->accept(this); 2578 lod_info = this->result; 2579 break; 2580 case ir_txf: 2581 opcode = TGSI_OPCODE_TXF; 2582 ir->lod_info.lod->accept(this); 2583 lod_info = this->result; 2584 if (ir->offset) { 2585 ir->offset->accept(this); 2586 offset = this->result; 2587 } 2588 break; 2589 } 2590 2591 const glsl_type *sampler_type = ir->sampler->type; 2592 2593 if (ir->projector) { 2594 if (opcode == TGSI_OPCODE_TEX) { 2595 /* Slot the projector in as the last component of the coord. */ 2596 coord_dst.writemask = WRITEMASK_W; 2597 emit(ir, TGSI_OPCODE_MOV, coord_dst, projector); 2598 coord_dst.writemask = WRITEMASK_XYZW; 2599 opcode = TGSI_OPCODE_TXP; 2600 } else { 2601 st_src_reg coord_w = coord; 2602 coord_w.swizzle = SWIZZLE_WWWW; 2603 2604 /* For the other TEX opcodes there's no projective version 2605 * since the last slot is taken up by LOD info. Do the 2606 * projective divide now. 2607 */ 2608 coord_dst.writemask = WRITEMASK_W; 2609 emit(ir, TGSI_OPCODE_RCP, coord_dst, projector); 2610 2611 /* In the case where we have to project the coordinates "by hand," 2612 * the shadow comparator value must also be projected. 2613 */ 2614 st_src_reg tmp_src = coord; 2615 if (ir->shadow_comparitor) { 2616 /* Slot the shadow value in as the second to last component of the 2617 * coord. 2618 */ 2619 ir->shadow_comparitor->accept(this); 2620 2621 tmp_src = get_temp(glsl_type::vec4_type); 2622 st_dst_reg tmp_dst = st_dst_reg(tmp_src); 2623 2624 /* Projective division not allowed for array samplers. */ 2625 assert(!sampler_type->sampler_array); 2626 2627 tmp_dst.writemask = WRITEMASK_Z; 2628 emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result); 2629 2630 tmp_dst.writemask = WRITEMASK_XY; 2631 emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord); 2632 } 2633 2634 coord_dst.writemask = WRITEMASK_XYZ; 2635 emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w); 2636 2637 coord_dst.writemask = WRITEMASK_XYZW; 2638 coord.swizzle = SWIZZLE_XYZW; 2639 } 2640 } 2641 2642 /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow 2643 * comparator was put in the correct place (and projected) by the code, 2644 * above, that handles by-hand projection. 2645 */ 2646 if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) { 2647 /* Slot the shadow value in as the second to last component of the 2648 * coord. 2649 */ 2650 ir->shadow_comparitor->accept(this); 2651 2652 /* XXX This will need to be updated for cubemap array samplers. */ 2653 if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D && 2654 sampler_type->sampler_array) || 2655 sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) { 2656 coord_dst.writemask = WRITEMASK_W; 2657 } else { 2658 coord_dst.writemask = WRITEMASK_Z; 2659 } 2660 2661 emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); 2662 coord_dst.writemask = WRITEMASK_XYZW; 2663 } 2664 2665 if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB || 2666 opcode == TGSI_OPCODE_TXF) { 2667 /* TGSI stores LOD or LOD bias in the last channel of the coords. */ 2668 coord_dst.writemask = WRITEMASK_W; 2669 emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info); 2670 coord_dst.writemask = WRITEMASK_XYZW; 2671 } 2672 2673 if (opcode == TGSI_OPCODE_TXD) 2674 inst = emit(ir, opcode, result_dst, coord, dx, dy); 2675 else if (opcode == TGSI_OPCODE_TXQ) 2676 inst = emit(ir, opcode, result_dst, lod_info); 2677 else if (opcode == TGSI_OPCODE_TXF) { 2678 inst = emit(ir, opcode, result_dst, coord); 2679 } else 2680 inst = emit(ir, opcode, result_dst, coord); 2681 2682 if (ir->shadow_comparitor) 2683 inst->tex_shadow = GL_TRUE; 2684 2685 inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler, 2686 this->shader_program, 2687 this->prog); 2688 2689 if (ir->offset) { 2690 inst->tex_offset_num_offset = 1; 2691 inst->tex_offsets[0].Index = offset.index; 2692 inst->tex_offsets[0].File = offset.file; 2693 inst->tex_offsets[0].SwizzleX = GET_SWZ(offset.swizzle, 0); 2694 inst->tex_offsets[0].SwizzleY = GET_SWZ(offset.swizzle, 1); 2695 inst->tex_offsets[0].SwizzleZ = GET_SWZ(offset.swizzle, 2); 2696 } 2697 2698 switch (sampler_type->sampler_dimensionality) { 2699 case GLSL_SAMPLER_DIM_1D: 2700 inst->tex_target = (sampler_type->sampler_array) 2701 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; 2702 break; 2703 case GLSL_SAMPLER_DIM_2D: 2704 inst->tex_target = (sampler_type->sampler_array) 2705 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; 2706 break; 2707 case GLSL_SAMPLER_DIM_3D: 2708 inst->tex_target = TEXTURE_3D_INDEX; 2709 break; 2710 case GLSL_SAMPLER_DIM_CUBE: 2711 inst->tex_target = TEXTURE_CUBE_INDEX; 2712 break; 2713 case GLSL_SAMPLER_DIM_RECT: 2714 inst->tex_target = TEXTURE_RECT_INDEX; 2715 break; 2716 case GLSL_SAMPLER_DIM_BUF: 2717 assert(!"FINISHME: Implement ARB_texture_buffer_object"); 2718 break; 2719 case GLSL_SAMPLER_DIM_EXTERNAL: 2720 inst->tex_target = TEXTURE_EXTERNAL_INDEX; 2721 break; 2722 default: 2723 assert(!"Should not get here."); 2724 } 2725 2726 this->result = result_src; 2727} 2728 2729void 2730glsl_to_tgsi_visitor::visit(ir_return *ir) 2731{ 2732 if (ir->get_value()) { 2733 st_dst_reg l; 2734 int i; 2735 2736 assert(current_function); 2737 2738 ir->get_value()->accept(this); 2739 st_src_reg r = this->result; 2740 2741 l = st_dst_reg(current_function->return_reg); 2742 2743 for (i = 0; i < type_size(current_function->sig->return_type); i++) { 2744 emit(ir, TGSI_OPCODE_MOV, l, r); 2745 l.index++; 2746 r.index++; 2747 } 2748 } 2749 2750 emit(ir, TGSI_OPCODE_RET); 2751} 2752 2753void 2754glsl_to_tgsi_visitor::visit(ir_discard *ir) 2755{ 2756 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; 2757 2758 if (ir->condition) { 2759 ir->condition->accept(this); 2760 this->result.negate = ~this->result.negate; 2761 emit(ir, TGSI_OPCODE_KIL, undef_dst, this->result); 2762 } else { 2763 emit(ir, TGSI_OPCODE_KILP); 2764 } 2765 2766 fp->UsesKill = GL_TRUE; 2767} 2768 2769void 2770glsl_to_tgsi_visitor::visit(ir_if *ir) 2771{ 2772 glsl_to_tgsi_instruction *cond_inst, *if_inst; 2773 glsl_to_tgsi_instruction *prev_inst; 2774 2775 prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2776 2777 ir->condition->accept(this); 2778 assert(this->result.file != PROGRAM_UNDEFINED); 2779 2780 if (this->options->EmitCondCodes) { 2781 cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2782 2783 /* See if we actually generated any instruction for generating 2784 * the condition. If not, then cook up a move to a temp so we 2785 * have something to set cond_update on. 2786 */ 2787 if (cond_inst == prev_inst) { 2788 st_src_reg temp = get_temp(glsl_type::bool_type); 2789 cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result); 2790 } 2791 cond_inst->cond_update = GL_TRUE; 2792 2793 if_inst = emit(ir->condition, TGSI_OPCODE_IF); 2794 if_inst->dst.cond_mask = COND_NE; 2795 } else { 2796 if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result); 2797 } 2798 2799 this->instructions.push_tail(if_inst); 2800 2801 visit_exec_list(&ir->then_instructions, this); 2802 2803 if (!ir->else_instructions.is_empty()) { 2804 emit(ir->condition, TGSI_OPCODE_ELSE); 2805 visit_exec_list(&ir->else_instructions, this); 2806 } 2807 2808 if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF); 2809} 2810 2811glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() 2812{ 2813 result.file = PROGRAM_UNDEFINED; 2814 next_temp = 1; 2815 next_signature_id = 1; 2816 num_immediates = 0; 2817 current_function = NULL; 2818 num_address_regs = 0; 2819 samplers_used = 0; 2820 indirect_addr_temps = false; 2821 indirect_addr_consts = false; 2822 num_clip_distances = 0; 2823 glsl_version = 0; 2824 native_integers = false; 2825 mem_ctx = ralloc_context(NULL); 2826 ctx = NULL; 2827 prog = NULL; 2828 shader_program = NULL; 2829 options = NULL; 2830} 2831 2832glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() 2833{ 2834 ralloc_free(mem_ctx); 2835} 2836 2837extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v) 2838{ 2839 delete v; 2840} 2841 2842 2843/** 2844 * Count resources used by the given gpu program (number of texture 2845 * samplers, etc). 2846 */ 2847static void 2848count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) 2849{ 2850 v->samplers_used = 0; 2851 2852 foreach_iter(exec_list_iterator, iter, v->instructions) { 2853 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2854 2855 if (is_tex_instruction(inst->op)) { 2856 v->samplers_used |= 1 << inst->sampler; 2857 2858 if (inst->tex_shadow) { 2859 prog->ShadowSamplers |= 1 << inst->sampler; 2860 } 2861 } 2862 } 2863 2864 prog->SamplersUsed = v->samplers_used; 2865 2866 if (v->shader_program != NULL) 2867 _mesa_update_shader_textures_used(v->shader_program, prog); 2868} 2869 2870static void 2871set_uniform_initializer(struct gl_context *ctx, void *mem_ctx, 2872 struct gl_shader_program *shader_program, 2873 const char *name, const glsl_type *type, 2874 ir_constant *val) 2875{ 2876 if (type->is_record()) { 2877 ir_constant *field_constant; 2878 2879 field_constant = (ir_constant *)val->components.get_head(); 2880 2881 for (unsigned int i = 0; i < type->length; i++) { 2882 const glsl_type *field_type = type->fields.structure[i].type; 2883 const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name, 2884 type->fields.structure[i].name); 2885 set_uniform_initializer(ctx, mem_ctx, shader_program, field_name, 2886 field_type, field_constant); 2887 field_constant = (ir_constant *)field_constant->next; 2888 } 2889 return; 2890 } 2891 2892 int loc = _mesa_get_uniform_location(ctx, shader_program, name); 2893 2894 if (loc == -1) { 2895 fail_link(shader_program, 2896 "Couldn't find uniform for initializer %s\n", name); 2897 return; 2898 } 2899 2900 for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) { 2901 ir_constant *element; 2902 const glsl_type *element_type; 2903 if (type->is_array()) { 2904 element = val->array_elements[i]; 2905 element_type = type->fields.array; 2906 } else { 2907 element = val; 2908 element_type = type; 2909 } 2910 2911 void *values; 2912 2913 if (element_type->base_type == GLSL_TYPE_BOOL) { 2914 int *conv = ralloc_array(mem_ctx, int, element_type->components()); 2915 for (unsigned int j = 0; j < element_type->components(); j++) { 2916 conv[j] = element->value.b[j]; 2917 } 2918 values = (void *)conv; 2919 element_type = glsl_type::get_instance(GLSL_TYPE_INT, 2920 element_type->vector_elements, 2921 1); 2922 } else { 2923 values = &element->value; 2924 } 2925 2926 if (element_type->is_matrix()) { 2927 _mesa_uniform_matrix(ctx, shader_program, 2928 element_type->matrix_columns, 2929 element_type->vector_elements, 2930 loc, 1, GL_FALSE, (GLfloat *)values); 2931 } else { 2932 _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns, 2933 values, element_type->gl_type); 2934 } 2935 2936 loc++; 2937 } 2938} 2939 2940/** 2941 * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which 2942 * are read from the given src in this instruction 2943 */ 2944static int 2945get_src_arg_mask(st_dst_reg dst, st_src_reg src) 2946{ 2947 int read_mask = 0, comp; 2948 2949 /* Now, given the src swizzle and the written channels, find which 2950 * components are actually read 2951 */ 2952 for (comp = 0; comp < 4; ++comp) { 2953 const unsigned coord = GET_SWZ(src.swizzle, comp); 2954 ASSERT(coord < 4); 2955 if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W) 2956 read_mask |= 1 << coord; 2957 } 2958 2959 return read_mask; 2960} 2961 2962/** 2963 * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP 2964 * instruction is the first instruction to write to register T0. There are 2965 * several lowering passes done in GLSL IR (e.g. branches and 2966 * relative addressing) that create a large number of conditional assignments 2967 * that ir_to_mesa converts to CMP instructions like the one mentioned above. 2968 * 2969 * Here is why this conversion is safe: 2970 * CMP T0, T1 T2 T0 can be expanded to: 2971 * if (T1 < 0.0) 2972 * MOV T0, T2; 2973 * else 2974 * MOV T0, T0; 2975 * 2976 * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same 2977 * as the original program. If (T1 < 0.0) evaluates to false, executing 2978 * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized. 2979 * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2 2980 * because any instruction that was going to read from T0 after this was going 2981 * to read a garbage value anyway. 2982 */ 2983void 2984glsl_to_tgsi_visitor::simplify_cmp(void) 2985{ 2986 unsigned *tempWrites; 2987 unsigned outputWrites[MAX_PROGRAM_OUTPUTS]; 2988 2989 tempWrites = new unsigned[MAX_TEMPS]; 2990 if (!tempWrites) { 2991 return; 2992 } 2993 memset(tempWrites, 0, sizeof(unsigned) * MAX_TEMPS); 2994 memset(outputWrites, 0, sizeof(outputWrites)); 2995 2996 foreach_iter(exec_list_iterator, iter, this->instructions) { 2997 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2998 unsigned prevWriteMask = 0; 2999 3000 /* Give up if we encounter relative addressing or flow control. */ 3001 if (inst->dst.reladdr || 3002 tgsi_get_opcode_info(inst->op)->is_branch || 3003 inst->op == TGSI_OPCODE_BGNSUB || 3004 inst->op == TGSI_OPCODE_CONT || 3005 inst->op == TGSI_OPCODE_END || 3006 inst->op == TGSI_OPCODE_ENDSUB || 3007 inst->op == TGSI_OPCODE_RET) { 3008 break; 3009 } 3010 3011 if (inst->dst.file == PROGRAM_OUTPUT) { 3012 assert(inst->dst.index < MAX_PROGRAM_OUTPUTS); 3013 prevWriteMask = outputWrites[inst->dst.index]; 3014 outputWrites[inst->dst.index] |= inst->dst.writemask; 3015 } else if (inst->dst.file == PROGRAM_TEMPORARY) { 3016 assert(inst->dst.index < MAX_TEMPS); 3017 prevWriteMask = tempWrites[inst->dst.index]; 3018 tempWrites[inst->dst.index] |= inst->dst.writemask; 3019 } 3020 3021 /* For a CMP to be considered a conditional write, the destination 3022 * register and source register two must be the same. */ 3023 if (inst->op == TGSI_OPCODE_CMP 3024 && !(inst->dst.writemask & prevWriteMask) 3025 && inst->src[2].file == inst->dst.file 3026 && inst->src[2].index == inst->dst.index 3027 && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) { 3028 3029 inst->op = TGSI_OPCODE_MOV; 3030 inst->src[0] = inst->src[1]; 3031 } 3032 } 3033 3034 delete [] tempWrites; 3035} 3036 3037/* Replaces all references to a temporary register index with another index. */ 3038void 3039glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) 3040{ 3041 foreach_iter(exec_list_iterator, iter, this->instructions) { 3042 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3043 unsigned j; 3044 3045 for (j=0; j < num_inst_src_regs(inst->op); j++) { 3046 if (inst->src[j].file == PROGRAM_TEMPORARY && 3047 inst->src[j].index == index) { 3048 inst->src[j].index = new_index; 3049 } 3050 } 3051 3052 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { 3053 inst->dst.index = new_index; 3054 } 3055 } 3056} 3057 3058int 3059glsl_to_tgsi_visitor::get_first_temp_read(int index) 3060{ 3061 int depth = 0; /* loop depth */ 3062 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 3063 unsigned i = 0, j; 3064 3065 foreach_iter(exec_list_iterator, iter, this->instructions) { 3066 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3067 3068 for (j=0; j < num_inst_src_regs(inst->op); j++) { 3069 if (inst->src[j].file == PROGRAM_TEMPORARY && 3070 inst->src[j].index == index) { 3071 return (depth == 0) ? i : loop_start; 3072 } 3073 } 3074 3075 if (inst->op == TGSI_OPCODE_BGNLOOP) { 3076 if(depth++ == 0) 3077 loop_start = i; 3078 } else if (inst->op == TGSI_OPCODE_ENDLOOP) { 3079 if (--depth == 0) 3080 loop_start = -1; 3081 } 3082 assert(depth >= 0); 3083 3084 i++; 3085 } 3086 3087 return -1; 3088} 3089 3090int 3091glsl_to_tgsi_visitor::get_first_temp_write(int index) 3092{ 3093 int depth = 0; /* loop depth */ 3094 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 3095 int i = 0; 3096 3097 foreach_iter(exec_list_iterator, iter, this->instructions) { 3098 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3099 3100 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { 3101 return (depth == 0) ? i : loop_start; 3102 } 3103 3104 if (inst->op == TGSI_OPCODE_BGNLOOP) { 3105 if(depth++ == 0) 3106 loop_start = i; 3107 } else if (inst->op == TGSI_OPCODE_ENDLOOP) { 3108 if (--depth == 0) 3109 loop_start = -1; 3110 } 3111 assert(depth >= 0); 3112 3113 i++; 3114 } 3115 3116 return -1; 3117} 3118 3119int 3120glsl_to_tgsi_visitor::get_last_temp_read(int index) 3121{ 3122 int depth = 0; /* loop depth */ 3123 int last = -1; /* index of last instruction that reads the temporary */ 3124 unsigned i = 0, j; 3125 3126 foreach_iter(exec_list_iterator, iter, this->instructions) { 3127 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3128 3129 for (j=0; j < num_inst_src_regs(inst->op); j++) { 3130 if (inst->src[j].file == PROGRAM_TEMPORARY && 3131 inst->src[j].index == index) { 3132 last = (depth == 0) ? i : -2; 3133 } 3134 } 3135 3136 if (inst->op == TGSI_OPCODE_BGNLOOP) 3137 depth++; 3138 else if (inst->op == TGSI_OPCODE_ENDLOOP) 3139 if (--depth == 0 && last == -2) 3140 last = i; 3141 assert(depth >= 0); 3142 3143 i++; 3144 } 3145 3146 assert(last >= -1); 3147 return last; 3148} 3149 3150int 3151glsl_to_tgsi_visitor::get_last_temp_write(int index) 3152{ 3153 int depth = 0; /* loop depth */ 3154 int last = -1; /* index of last instruction that writes to the temporary */ 3155 int i = 0; 3156 3157 foreach_iter(exec_list_iterator, iter, this->instructions) { 3158 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3159 3160 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) 3161 last = (depth == 0) ? i : -2; 3162 3163 if (inst->op == TGSI_OPCODE_BGNLOOP) 3164 depth++; 3165 else if (inst->op == TGSI_OPCODE_ENDLOOP) 3166 if (--depth == 0 && last == -2) 3167 last = i; 3168 assert(depth >= 0); 3169 3170 i++; 3171 } 3172 3173 assert(last >= -1); 3174 return last; 3175} 3176 3177/* 3178 * On a basic block basis, tracks available PROGRAM_TEMPORARY register 3179 * channels for copy propagation and updates following instructions to 3180 * use the original versions. 3181 * 3182 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3183 * will occur. As an example, a TXP production before this pass: 3184 * 3185 * 0: MOV TEMP[1], INPUT[4].xyyy; 3186 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3187 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D; 3188 * 3189 * and after: 3190 * 3191 * 0: MOV TEMP[1], INPUT[4].xyyy; 3192 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3193 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3194 * 3195 * which allows for dead code elimination on TEMP[1]'s writes. 3196 */ 3197void 3198glsl_to_tgsi_visitor::copy_propagate(void) 3199{ 3200 glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx, 3201 glsl_to_tgsi_instruction *, 3202 this->next_temp * 4); 3203 int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 3204 int level = 0; 3205 3206 foreach_iter(exec_list_iterator, iter, this->instructions) { 3207 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3208 3209 assert(inst->dst.file != PROGRAM_TEMPORARY 3210 || inst->dst.index < this->next_temp); 3211 3212 /* First, do any copy propagation possible into the src regs. */ 3213 for (int r = 0; r < 3; r++) { 3214 glsl_to_tgsi_instruction *first = NULL; 3215 bool good = true; 3216 int acp_base = inst->src[r].index * 4; 3217 3218 if (inst->src[r].file != PROGRAM_TEMPORARY || 3219 inst->src[r].reladdr) 3220 continue; 3221 3222 /* See if we can find entries in the ACP consisting of MOVs 3223 * from the same src register for all the swizzled channels 3224 * of this src register reference. 3225 */ 3226 for (int i = 0; i < 4; i++) { 3227 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 3228 glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan]; 3229 3230 if (!copy_chan) { 3231 good = false; 3232 break; 3233 } 3234 3235 assert(acp_level[acp_base + src_chan] <= level); 3236 3237 if (!first) { 3238 first = copy_chan; 3239 } else { 3240 if (first->src[0].file != copy_chan->src[0].file || 3241 first->src[0].index != copy_chan->src[0].index) { 3242 good = false; 3243 break; 3244 } 3245 } 3246 } 3247 3248 if (good) { 3249 /* We've now validated that we can copy-propagate to 3250 * replace this src register reference. Do it. 3251 */ 3252 inst->src[r].file = first->src[0].file; 3253 inst->src[r].index = first->src[0].index; 3254 3255 int swizzle = 0; 3256 for (int i = 0; i < 4; i++) { 3257 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 3258 glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan]; 3259 swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << 3260 (3 * i)); 3261 } 3262 inst->src[r].swizzle = swizzle; 3263 } 3264 } 3265 3266 switch (inst->op) { 3267 case TGSI_OPCODE_BGNLOOP: 3268 case TGSI_OPCODE_ENDLOOP: 3269 /* End of a basic block, clear the ACP entirely. */ 3270 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 3271 break; 3272 3273 case TGSI_OPCODE_IF: 3274 ++level; 3275 break; 3276 3277 case TGSI_OPCODE_ENDIF: 3278 case TGSI_OPCODE_ELSE: 3279 /* Clear all channels written inside the block from the ACP, but 3280 * leaving those that were not touched. 3281 */ 3282 for (int r = 0; r < this->next_temp; r++) { 3283 for (int c = 0; c < 4; c++) { 3284 if (!acp[4 * r + c]) 3285 continue; 3286 3287 if (acp_level[4 * r + c] >= level) 3288 acp[4 * r + c] = NULL; 3289 } 3290 } 3291 if (inst->op == TGSI_OPCODE_ENDIF) 3292 --level; 3293 break; 3294 3295 default: 3296 /* Continuing the block, clear any written channels from 3297 * the ACP. 3298 */ 3299 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) { 3300 /* Any temporary might be written, so no copy propagation 3301 * across this instruction. 3302 */ 3303 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 3304 } else if (inst->dst.file == PROGRAM_OUTPUT && 3305 inst->dst.reladdr) { 3306 /* Any output might be written, so no copy propagation 3307 * from outputs across this instruction. 3308 */ 3309 for (int r = 0; r < this->next_temp; r++) { 3310 for (int c = 0; c < 4; c++) { 3311 if (!acp[4 * r + c]) 3312 continue; 3313 3314 if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT) 3315 acp[4 * r + c] = NULL; 3316 } 3317 } 3318 } else if (inst->dst.file == PROGRAM_TEMPORARY || 3319 inst->dst.file == PROGRAM_OUTPUT) { 3320 /* Clear where it's used as dst. */ 3321 if (inst->dst.file == PROGRAM_TEMPORARY) { 3322 for (int c = 0; c < 4; c++) { 3323 if (inst->dst.writemask & (1 << c)) { 3324 acp[4 * inst->dst.index + c] = NULL; 3325 } 3326 } 3327 } 3328 3329 /* Clear where it's used as src. */ 3330 for (int r = 0; r < this->next_temp; r++) { 3331 for (int c = 0; c < 4; c++) { 3332 if (!acp[4 * r + c]) 3333 continue; 3334 3335 int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c); 3336 3337 if (acp[4 * r + c]->src[0].file == inst->dst.file && 3338 acp[4 * r + c]->src[0].index == inst->dst.index && 3339 inst->dst.writemask & (1 << src_chan)) 3340 { 3341 acp[4 * r + c] = NULL; 3342 } 3343 } 3344 } 3345 } 3346 break; 3347 } 3348 3349 /* If this is a copy, add it to the ACP. */ 3350 if (inst->op == TGSI_OPCODE_MOV && 3351 inst->dst.file == PROGRAM_TEMPORARY && 3352 !inst->dst.reladdr && 3353 !inst->saturate && 3354 !inst->src[0].reladdr && 3355 !inst->src[0].negate) { 3356 for (int i = 0; i < 4; i++) { 3357 if (inst->dst.writemask & (1 << i)) { 3358 acp[4 * inst->dst.index + i] = inst; 3359 acp_level[4 * inst->dst.index + i] = level; 3360 } 3361 } 3362 } 3363 } 3364 3365 ralloc_free(acp_level); 3366 ralloc_free(acp); 3367} 3368 3369/* 3370 * Tracks available PROGRAM_TEMPORARY registers for dead code elimination. 3371 * 3372 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3373 * will occur. As an example, a TXP production after copy propagation but 3374 * before this pass: 3375 * 3376 * 0: MOV TEMP[1], INPUT[4].xyyy; 3377 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3378 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3379 * 3380 * and after this pass: 3381 * 3382 * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3383 * 3384 * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB) 3385 * FIXME: doesn't eliminate all dead code inside of loops; it steps around them 3386 */ 3387void 3388glsl_to_tgsi_visitor::eliminate_dead_code(void) 3389{ 3390 int i; 3391 3392 for (i=0; i < this->next_temp; i++) { 3393 int last_read = get_last_temp_read(i); 3394 int j = 0; 3395 3396 foreach_iter(exec_list_iterator, iter, this->instructions) { 3397 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3398 3399 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i && 3400 j > last_read) 3401 { 3402 iter.remove(); 3403 delete inst; 3404 } 3405 3406 j++; 3407 } 3408 } 3409} 3410 3411/* 3412 * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead 3413 * code elimination. This is less primitive than eliminate_dead_code(), as it 3414 * is per-channel and can detect consecutive writes without a read between them 3415 * as dead code. However, there is some dead code that can be eliminated by 3416 * eliminate_dead_code() but not this function - for example, this function 3417 * cannot eliminate an instruction writing to a register that is never read and 3418 * is the only instruction writing to that register. 3419 * 3420 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3421 * will occur. 3422 */ 3423int 3424glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) 3425{ 3426 glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx, 3427 glsl_to_tgsi_instruction *, 3428 this->next_temp * 4); 3429 int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 3430 int level = 0; 3431 int removed = 0; 3432 3433 foreach_iter(exec_list_iterator, iter, this->instructions) { 3434 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3435 3436 assert(inst->dst.file != PROGRAM_TEMPORARY 3437 || inst->dst.index < this->next_temp); 3438 3439 switch (inst->op) { 3440 case TGSI_OPCODE_BGNLOOP: 3441 case TGSI_OPCODE_ENDLOOP: 3442 case TGSI_OPCODE_CONT: 3443 case TGSI_OPCODE_BRK: 3444 /* End of a basic block, clear the write array entirely. 3445 * 3446 * This keeps us from killing dead code when the writes are 3447 * on either side of a loop, even when the register isn't touched 3448 * inside the loop. However, glsl_to_tgsi_visitor doesn't seem to emit 3449 * dead code of this type, so it shouldn't make a difference as long as 3450 * the dead code elimination pass in the GLSL compiler does its job. 3451 */ 3452 memset(writes, 0, sizeof(*writes) * this->next_temp * 4); 3453 break; 3454 3455 case TGSI_OPCODE_ENDIF: 3456 case TGSI_OPCODE_ELSE: 3457 /* Promote the recorded level of all channels written inside the 3458 * preceding if or else block to the level above the if/else block. 3459 */ 3460 for (int r = 0; r < this->next_temp; r++) { 3461 for (int c = 0; c < 4; c++) { 3462 if (!writes[4 * r + c]) 3463 continue; 3464 3465 if (write_level[4 * r + c] == level) 3466 write_level[4 * r + c] = level-1; 3467 } 3468 } 3469 3470 if(inst->op == TGSI_OPCODE_ENDIF) 3471 --level; 3472 3473 break; 3474 3475 case TGSI_OPCODE_IF: 3476 ++level; 3477 /* fallthrough to default case to mark the condition as read */ 3478 3479 default: 3480 /* Continuing the block, clear any channels from the write array that 3481 * are read by this instruction. 3482 */ 3483 for (unsigned i = 0; i < Elements(inst->src); i++) { 3484 if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){ 3485 /* Any temporary might be read, so no dead code elimination 3486 * across this instruction. 3487 */ 3488 memset(writes, 0, sizeof(*writes) * this->next_temp * 4); 3489 } else if (inst->src[i].file == PROGRAM_TEMPORARY) { 3490 /* Clear where it's used as src. */ 3491 int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0); 3492 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1); 3493 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2); 3494 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3); 3495 3496 for (int c = 0; c < 4; c++) { 3497 if (src_chans & (1 << c)) { 3498 writes[4 * inst->src[i].index + c] = NULL; 3499 } 3500 } 3501 } 3502 } 3503 break; 3504 } 3505 3506 /* If this instruction writes to a temporary, add it to the write array. 3507 * If there is already an instruction in the write array for one or more 3508 * of the channels, flag that channel write as dead. 3509 */ 3510 if (inst->dst.file == PROGRAM_TEMPORARY && 3511 !inst->dst.reladdr && 3512 !inst->saturate) { 3513 for (int c = 0; c < 4; c++) { 3514 if (inst->dst.writemask & (1 << c)) { 3515 if (writes[4 * inst->dst.index + c]) { 3516 if (write_level[4 * inst->dst.index + c] < level) 3517 continue; 3518 else 3519 writes[4 * inst->dst.index + c]->dead_mask |= (1 << c); 3520 } 3521 writes[4 * inst->dst.index + c] = inst; 3522 write_level[4 * inst->dst.index + c] = level; 3523 } 3524 } 3525 } 3526 } 3527 3528 /* Anything still in the write array at this point is dead code. */ 3529 for (int r = 0; r < this->next_temp; r++) { 3530 for (int c = 0; c < 4; c++) { 3531 glsl_to_tgsi_instruction *inst = writes[4 * r + c]; 3532 if (inst) 3533 inst->dead_mask |= (1 << c); 3534 } 3535 } 3536 3537 /* Now actually remove the instructions that are completely dead and update 3538 * the writemask of other instructions with dead channels. 3539 */ 3540 foreach_iter(exec_list_iterator, iter, this->instructions) { 3541 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3542 3543 if (!inst->dead_mask || !inst->dst.writemask) 3544 continue; 3545 else if ((inst->dst.writemask & ~inst->dead_mask) == 0) { 3546 iter.remove(); 3547 delete inst; 3548 removed++; 3549 } else 3550 inst->dst.writemask &= ~(inst->dead_mask); 3551 } 3552 3553 ralloc_free(write_level); 3554 ralloc_free(writes); 3555 3556 return removed; 3557} 3558 3559/* Merges temporary registers together where possible to reduce the number of 3560 * registers needed to run a program. 3561 * 3562 * Produces optimal code only after copy propagation and dead code elimination 3563 * have been run. */ 3564void 3565glsl_to_tgsi_visitor::merge_registers(void) 3566{ 3567 int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp); 3568 int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp); 3569 int i, j; 3570 3571 /* Read the indices of the last read and first write to each temp register 3572 * into an array so that we don't have to traverse the instruction list as 3573 * much. */ 3574 for (i=0; i < this->next_temp; i++) { 3575 last_reads[i] = get_last_temp_read(i); 3576 first_writes[i] = get_first_temp_write(i); 3577 } 3578 3579 /* Start looking for registers with non-overlapping usages that can be 3580 * merged together. */ 3581 for (i=0; i < this->next_temp; i++) { 3582 /* Don't touch unused registers. */ 3583 if (last_reads[i] < 0 || first_writes[i] < 0) continue; 3584 3585 for (j=0; j < this->next_temp; j++) { 3586 /* Don't touch unused registers. */ 3587 if (last_reads[j] < 0 || first_writes[j] < 0) continue; 3588 3589 /* We can merge the two registers if the first write to j is after or 3590 * in the same instruction as the last read from i. Note that the 3591 * register at index i will always be used earlier or at the same time 3592 * as the register at index j. */ 3593 if (first_writes[i] <= first_writes[j] && 3594 last_reads[i] <= first_writes[j]) 3595 { 3596 rename_temp_register(j, i); /* Replace all references to j with i.*/ 3597 3598 /* Update the first_writes and last_reads arrays with the new 3599 * values for the merged register index, and mark the newly unused 3600 * register index as such. */ 3601 last_reads[i] = last_reads[j]; 3602 first_writes[j] = -1; 3603 last_reads[j] = -1; 3604 } 3605 } 3606 } 3607 3608 ralloc_free(last_reads); 3609 ralloc_free(first_writes); 3610} 3611 3612/* Reassign indices to temporary registers by reusing unused indices created 3613 * by optimization passes. */ 3614void 3615glsl_to_tgsi_visitor::renumber_registers(void) 3616{ 3617 int i = 0; 3618 int new_index = 0; 3619 3620 for (i=0; i < this->next_temp; i++) { 3621 if (get_first_temp_read(i) < 0) continue; 3622 if (i != new_index) 3623 rename_temp_register(i, new_index); 3624 new_index++; 3625 } 3626 3627 this->next_temp = new_index; 3628} 3629 3630/** 3631 * Returns a fragment program which implements the current pixel transfer ops. 3632 * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c. 3633 */ 3634extern "C" void 3635get_pixel_transfer_visitor(struct st_fragment_program *fp, 3636 glsl_to_tgsi_visitor *original, 3637 int scale_and_bias, int pixel_maps) 3638{ 3639 glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); 3640 struct st_context *st = st_context(original->ctx); 3641 struct gl_program *prog = &fp->Base.Base; 3642 struct gl_program_parameter_list *params = _mesa_new_parameter_list(); 3643 st_src_reg coord, src0; 3644 st_dst_reg dst0; 3645 glsl_to_tgsi_instruction *inst; 3646 3647 /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ 3648 v->ctx = original->ctx; 3649 v->prog = prog; 3650 v->shader_program = NULL; 3651 v->glsl_version = original->glsl_version; 3652 v->native_integers = original->native_integers; 3653 v->options = original->options; 3654 v->next_temp = original->next_temp; 3655 v->num_address_regs = original->num_address_regs; 3656 v->samplers_used = prog->SamplersUsed = original->samplers_used; 3657 v->indirect_addr_temps = original->indirect_addr_temps; 3658 v->indirect_addr_consts = original->indirect_addr_consts; 3659 memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); 3660 v->num_immediates = original->num_immediates; 3661 3662 /* 3663 * Get initial pixel color from the texture. 3664 * TEX colorTemp, fragment.texcoord[0], texture[0], 2D; 3665 */ 3666 coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); 3667 src0 = v->get_temp(glsl_type::vec4_type); 3668 dst0 = st_dst_reg(src0); 3669 inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); 3670 inst->sampler = 0; 3671 inst->tex_target = TEXTURE_2D_INDEX; 3672 3673 prog->InputsRead |= FRAG_BIT_TEX0; 3674 prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */ 3675 v->samplers_used |= (1 << 0); 3676 3677 if (scale_and_bias) { 3678 static const gl_state_index scale_state[STATE_LENGTH] = 3679 { STATE_INTERNAL, STATE_PT_SCALE, 3680 (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; 3681 static const gl_state_index bias_state[STATE_LENGTH] = 3682 { STATE_INTERNAL, STATE_PT_BIAS, 3683 (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; 3684 GLint scale_p, bias_p; 3685 st_src_reg scale, bias; 3686 3687 scale_p = _mesa_add_state_reference(params, scale_state); 3688 bias_p = _mesa_add_state_reference(params, bias_state); 3689 3690 /* MAD colorTemp, colorTemp, scale, bias; */ 3691 scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT); 3692 bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT); 3693 inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias); 3694 } 3695 3696 if (pixel_maps) { 3697 st_src_reg temp = v->get_temp(glsl_type::vec4_type); 3698 st_dst_reg temp_dst = st_dst_reg(temp); 3699 3700 assert(st->pixel_xfer.pixelmap_texture); 3701 3702 /* With a little effort, we can do four pixel map look-ups with 3703 * two TEX instructions: 3704 */ 3705 3706 /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */ 3707 temp_dst.writemask = WRITEMASK_XY; /* write R,G */ 3708 inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); 3709 inst->sampler = 1; 3710 inst->tex_target = TEXTURE_2D_INDEX; 3711 3712 /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */ 3713 src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W); 3714 temp_dst.writemask = WRITEMASK_ZW; /* write B,A */ 3715 inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); 3716 inst->sampler = 1; 3717 inst->tex_target = TEXTURE_2D_INDEX; 3718 3719 prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */ 3720 v->samplers_used |= (1 << 1); 3721 3722 /* MOV colorTemp, temp; */ 3723 inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp); 3724 } 3725 3726 /* Now copy the instructions from the original glsl_to_tgsi_visitor into the 3727 * new visitor. */ 3728 foreach_iter(exec_list_iterator, iter, original->instructions) { 3729 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3730 glsl_to_tgsi_instruction *newinst; 3731 st_src_reg src_regs[3]; 3732 3733 if (inst->dst.file == PROGRAM_OUTPUT) 3734 prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); 3735 3736 for (int i=0; i<3; i++) { 3737 src_regs[i] = inst->src[i]; 3738 if (src_regs[i].file == PROGRAM_INPUT && 3739 src_regs[i].index == FRAG_ATTRIB_COL0) 3740 { 3741 src_regs[i].file = PROGRAM_TEMPORARY; 3742 src_regs[i].index = src0.index; 3743 } 3744 else if (src_regs[i].file == PROGRAM_INPUT) 3745 prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index); 3746 } 3747 3748 newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); 3749 newinst->tex_target = inst->tex_target; 3750 } 3751 3752 /* Make modifications to fragment program info. */ 3753 prog->Parameters = _mesa_combine_parameter_lists(params, 3754 original->prog->Parameters); 3755 _mesa_free_parameter_list(params); 3756 count_resources(v, prog); 3757 fp->glsl_to_tgsi = v; 3758} 3759 3760/** 3761 * Make fragment program for glBitmap: 3762 * Sample the texture and kill the fragment if the bit is 0. 3763 * This program will be combined with the user's fragment program. 3764 * 3765 * Based on make_bitmap_fragment_program in st_cb_bitmap.c. 3766 */ 3767extern "C" void 3768get_bitmap_visitor(struct st_fragment_program *fp, 3769 glsl_to_tgsi_visitor *original, int samplerIndex) 3770{ 3771 glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); 3772 struct st_context *st = st_context(original->ctx); 3773 struct gl_program *prog = &fp->Base.Base; 3774 st_src_reg coord, src0; 3775 st_dst_reg dst0; 3776 glsl_to_tgsi_instruction *inst; 3777 3778 /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ 3779 v->ctx = original->ctx; 3780 v->prog = prog; 3781 v->shader_program = NULL; 3782 v->glsl_version = original->glsl_version; 3783 v->native_integers = original->native_integers; 3784 v->options = original->options; 3785 v->next_temp = original->next_temp; 3786 v->num_address_regs = original->num_address_regs; 3787 v->samplers_used = prog->SamplersUsed = original->samplers_used; 3788 v->indirect_addr_temps = original->indirect_addr_temps; 3789 v->indirect_addr_consts = original->indirect_addr_consts; 3790 memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); 3791 v->num_immediates = original->num_immediates; 3792 3793 /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ 3794 coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); 3795 src0 = v->get_temp(glsl_type::vec4_type); 3796 dst0 = st_dst_reg(src0); 3797 inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); 3798 inst->sampler = samplerIndex; 3799 inst->tex_target = TEXTURE_2D_INDEX; 3800 3801 prog->InputsRead |= FRAG_BIT_TEX0; 3802 prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */ 3803 v->samplers_used |= (1 << samplerIndex); 3804 3805 /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */ 3806 src0.negate = NEGATE_XYZW; 3807 if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM) 3808 src0.swizzle = SWIZZLE_XXXX; 3809 inst = v->emit(NULL, TGSI_OPCODE_KIL, undef_dst, src0); 3810 3811 /* Now copy the instructions from the original glsl_to_tgsi_visitor into the 3812 * new visitor. */ 3813 foreach_iter(exec_list_iterator, iter, original->instructions) { 3814 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3815 glsl_to_tgsi_instruction *newinst; 3816 st_src_reg src_regs[3]; 3817 3818 if (inst->dst.file == PROGRAM_OUTPUT) 3819 prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); 3820 3821 for (int i=0; i<3; i++) { 3822 src_regs[i] = inst->src[i]; 3823 if (src_regs[i].file == PROGRAM_INPUT) 3824 prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index); 3825 } 3826 3827 newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); 3828 newinst->tex_target = inst->tex_target; 3829 } 3830 3831 /* Make modifications to fragment program info. */ 3832 prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters); 3833 count_resources(v, prog); 3834 fp->glsl_to_tgsi = v; 3835} 3836 3837/* ------------------------- TGSI conversion stuff -------------------------- */ 3838struct label { 3839 unsigned branch_target; 3840 unsigned token; 3841}; 3842 3843/** 3844 * Intermediate state used during shader translation. 3845 */ 3846struct st_translate { 3847 struct ureg_program *ureg; 3848 3849 struct ureg_dst temps[MAX_TEMPS]; 3850 struct ureg_src *constants; 3851 struct ureg_src *immediates; 3852 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; 3853 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; 3854 struct ureg_dst address[1]; 3855 struct ureg_src samplers[PIPE_MAX_SAMPLERS]; 3856 struct ureg_src systemValues[SYSTEM_VALUE_MAX]; 3857 3858 const GLuint *inputMapping; 3859 const GLuint *outputMapping; 3860 3861 /* For every instruction that contains a label (eg CALL), keep 3862 * details so that we can go back afterwards and emit the correct 3863 * tgsi instruction number for each label. 3864 */ 3865 struct label *labels; 3866 unsigned labels_size; 3867 unsigned labels_count; 3868 3869 /* Keep a record of the tgsi instruction number that each mesa 3870 * instruction starts at, will be used to fix up labels after 3871 * translation. 3872 */ 3873 unsigned *insn; 3874 unsigned insn_size; 3875 unsigned insn_count; 3876 3877 unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */ 3878 3879 boolean error; 3880}; 3881 3882/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */ 3883static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { 3884 TGSI_SEMANTIC_FACE, 3885 TGSI_SEMANTIC_VERTEXID, 3886 TGSI_SEMANTIC_INSTANCEID 3887}; 3888 3889/** 3890 * Make note of a branch to a label in the TGSI code. 3891 * After we've emitted all instructions, we'll go over the list 3892 * of labels built here and patch the TGSI code with the actual 3893 * location of each label. 3894 */ 3895static unsigned *get_label(struct st_translate *t, unsigned branch_target) 3896{ 3897 unsigned i; 3898 3899 if (t->labels_count + 1 >= t->labels_size) { 3900 t->labels_size = 1 << (util_logbase2(t->labels_size) + 1); 3901 t->labels = (struct label *)realloc(t->labels, 3902 t->labels_size * sizeof(struct label)); 3903 if (t->labels == NULL) { 3904 static unsigned dummy; 3905 t->error = TRUE; 3906 return &dummy; 3907 } 3908 } 3909 3910 i = t->labels_count++; 3911 t->labels[i].branch_target = branch_target; 3912 return &t->labels[i].token; 3913} 3914 3915/** 3916 * Called prior to emitting the TGSI code for each instruction. 3917 * Allocate additional space for instructions if needed. 3918 * Update the insn[] array so the next glsl_to_tgsi_instruction points to 3919 * the next TGSI instruction. 3920 */ 3921static void set_insn_start(struct st_translate *t, unsigned start) 3922{ 3923 if (t->insn_count + 1 >= t->insn_size) { 3924 t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); 3925 t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0])); 3926 if (t->insn == NULL) { 3927 t->error = TRUE; 3928 return; 3929 } 3930 } 3931 3932 t->insn[t->insn_count++] = start; 3933} 3934 3935/** 3936 * Map a glsl_to_tgsi constant/immediate to a TGSI immediate. 3937 */ 3938static struct ureg_src 3939emit_immediate(struct st_translate *t, 3940 gl_constant_value values[4], 3941 int type, int size) 3942{ 3943 struct ureg_program *ureg = t->ureg; 3944 3945 switch(type) 3946 { 3947 case GL_FLOAT: 3948 return ureg_DECL_immediate(ureg, &values[0].f, size); 3949 case GL_INT: 3950 return ureg_DECL_immediate_int(ureg, &values[0].i, size); 3951 case GL_UNSIGNED_INT: 3952 case GL_BOOL: 3953 return ureg_DECL_immediate_uint(ureg, &values[0].u, size); 3954 default: 3955 assert(!"should not get here - type must be float, int, uint, or bool"); 3956 return ureg_src_undef(); 3957 } 3958} 3959 3960/** 3961 * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register. 3962 */ 3963static struct ureg_dst 3964dst_register(struct st_translate *t, 3965 gl_register_file file, 3966 GLuint index) 3967{ 3968 switch(file) { 3969 case PROGRAM_UNDEFINED: 3970 return ureg_dst_undef(); 3971 3972 case PROGRAM_TEMPORARY: 3973 if (ureg_dst_is_undef(t->temps[index])) 3974 t->temps[index] = ureg_DECL_local_temporary(t->ureg); 3975 3976 return t->temps[index]; 3977 3978 case PROGRAM_OUTPUT: 3979 if (t->procType == TGSI_PROCESSOR_VERTEX) 3980 assert(index < VERT_RESULT_MAX); 3981 else if (t->procType == TGSI_PROCESSOR_FRAGMENT) 3982 assert(index < FRAG_RESULT_MAX); 3983 else 3984 assert(index < GEOM_RESULT_MAX); 3985 3986 assert(t->outputMapping[index] < Elements(t->outputs)); 3987 3988 return t->outputs[t->outputMapping[index]]; 3989 3990 case PROGRAM_ADDRESS: 3991 return t->address[index]; 3992 3993 default: 3994 assert(!"unknown dst register file"); 3995 return ureg_dst_undef(); 3996 } 3997} 3998 3999/** 4000 * Map a glsl_to_tgsi src register to a TGSI ureg_src register. 4001 */ 4002static struct ureg_src 4003src_register(struct st_translate *t, 4004 gl_register_file file, 4005 GLuint index) 4006{ 4007 switch(file) { 4008 case PROGRAM_UNDEFINED: 4009 return ureg_src_undef(); 4010 4011 case PROGRAM_TEMPORARY: 4012 assert(index >= 0); 4013 assert(index < Elements(t->temps)); 4014 if (ureg_dst_is_undef(t->temps[index])) 4015 t->temps[index] = ureg_DECL_local_temporary(t->ureg); 4016 return ureg_src(t->temps[index]); 4017 4018 case PROGRAM_NAMED_PARAM: 4019 case PROGRAM_ENV_PARAM: 4020 case PROGRAM_LOCAL_PARAM: 4021 case PROGRAM_UNIFORM: 4022 assert(index >= 0); 4023 return t->constants[index]; 4024 case PROGRAM_STATE_VAR: 4025 case PROGRAM_CONSTANT: /* ie, immediate */ 4026 if (index < 0) 4027 return ureg_DECL_constant(t->ureg, 0); 4028 else 4029 return t->constants[index]; 4030 4031 case PROGRAM_IMMEDIATE: 4032 return t->immediates[index]; 4033 4034 case PROGRAM_INPUT: 4035 assert(t->inputMapping[index] < Elements(t->inputs)); 4036 return t->inputs[t->inputMapping[index]]; 4037 4038 case PROGRAM_OUTPUT: 4039 assert(t->outputMapping[index] < Elements(t->outputs)); 4040 return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */ 4041 4042 case PROGRAM_ADDRESS: 4043 return ureg_src(t->address[index]); 4044 4045 case PROGRAM_SYSTEM_VALUE: 4046 assert(index < Elements(t->systemValues)); 4047 return t->systemValues[index]; 4048 4049 default: 4050 assert(!"unknown src register file"); 4051 return ureg_src_undef(); 4052 } 4053} 4054 4055/** 4056 * Create a TGSI ureg_dst register from an st_dst_reg. 4057 */ 4058static struct ureg_dst 4059translate_dst(struct st_translate *t, 4060 const st_dst_reg *dst_reg, 4061 bool saturate, bool clamp_color) 4062{ 4063 struct ureg_dst dst = dst_register(t, 4064 dst_reg->file, 4065 dst_reg->index); 4066 4067 dst = ureg_writemask(dst, dst_reg->writemask); 4068 4069 if (saturate) 4070 dst = ureg_saturate(dst); 4071 else if (clamp_color && dst_reg->file == PROGRAM_OUTPUT) { 4072 /* Clamp colors for ARB_color_buffer_float. */ 4073 switch (t->procType) { 4074 case TGSI_PROCESSOR_VERTEX: 4075 /* XXX if the geometry shader is present, this must be done there 4076 * instead of here. */ 4077 if (dst_reg->index == VERT_RESULT_COL0 || 4078 dst_reg->index == VERT_RESULT_COL1 || 4079 dst_reg->index == VERT_RESULT_BFC0 || 4080 dst_reg->index == VERT_RESULT_BFC1) { 4081 dst = ureg_saturate(dst); 4082 } 4083 break; 4084 4085 case TGSI_PROCESSOR_FRAGMENT: 4086 if (dst_reg->index >= FRAG_RESULT_COLOR) { 4087 dst = ureg_saturate(dst); 4088 } 4089 break; 4090 } 4091 } 4092 4093 if (dst_reg->reladdr != NULL) 4094 dst = ureg_dst_indirect(dst, ureg_src(t->address[0])); 4095 4096 return dst; 4097} 4098 4099/** 4100 * Create a TGSI ureg_src register from an st_src_reg. 4101 */ 4102static struct ureg_src 4103translate_src(struct st_translate *t, const st_src_reg *src_reg) 4104{ 4105 struct ureg_src src = src_register(t, src_reg->file, src_reg->index); 4106 4107 src = ureg_swizzle(src, 4108 GET_SWZ(src_reg->swizzle, 0) & 0x3, 4109 GET_SWZ(src_reg->swizzle, 1) & 0x3, 4110 GET_SWZ(src_reg->swizzle, 2) & 0x3, 4111 GET_SWZ(src_reg->swizzle, 3) & 0x3); 4112 4113 if ((src_reg->negate & 0xf) == NEGATE_XYZW) 4114 src = ureg_negate(src); 4115 4116 if (src_reg->reladdr != NULL) { 4117 /* Normally ureg_src_indirect() would be used here, but a stupid compiler 4118 * bug in g++ makes ureg_src_indirect (an inline C function) erroneously 4119 * set the bit for src.Negate. So we have to do the operation manually 4120 * here to work around the compiler's problems. */ 4121 /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/ 4122 struct ureg_src addr = ureg_src(t->address[0]); 4123 src.Indirect = 1; 4124 src.IndirectFile = addr.File; 4125 src.IndirectIndex = addr.Index; 4126 src.IndirectSwizzle = addr.SwizzleX; 4127 4128 if (src_reg->file != PROGRAM_INPUT && 4129 src_reg->file != PROGRAM_OUTPUT) { 4130 /* If src_reg->index was negative, it was set to zero in 4131 * src_register(). Reassign it now. But don't do this 4132 * for input/output regs since they get remapped while 4133 * const buffers don't. 4134 */ 4135 src.Index = src_reg->index; 4136 } 4137 } 4138 4139 return src; 4140} 4141 4142static struct tgsi_texture_offset 4143translate_tex_offset(struct st_translate *t, 4144 const struct tgsi_texture_offset *in_offset) 4145{ 4146 struct tgsi_texture_offset offset; 4147 4148 assert(in_offset->File == PROGRAM_IMMEDIATE); 4149 4150 offset.File = TGSI_FILE_IMMEDIATE; 4151 offset.Index = in_offset->Index; 4152 offset.SwizzleX = in_offset->SwizzleX; 4153 offset.SwizzleY = in_offset->SwizzleY; 4154 offset.SwizzleZ = in_offset->SwizzleZ; 4155 4156 return offset; 4157} 4158 4159static void 4160compile_tgsi_instruction(struct st_translate *t, 4161 const glsl_to_tgsi_instruction *inst, 4162 bool clamp_dst_color_output) 4163{ 4164 struct ureg_program *ureg = t->ureg; 4165 GLuint i; 4166 struct ureg_dst dst[1]; 4167 struct ureg_src src[4]; 4168 struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET]; 4169 4170 unsigned num_dst; 4171 unsigned num_src; 4172 4173 num_dst = num_inst_dst_regs(inst->op); 4174 num_src = num_inst_src_regs(inst->op); 4175 4176 if (num_dst) 4177 dst[0] = translate_dst(t, 4178 &inst->dst, 4179 inst->saturate, 4180 clamp_dst_color_output); 4181 4182 for (i = 0; i < num_src; i++) 4183 src[i] = translate_src(t, &inst->src[i]); 4184 4185 switch(inst->op) { 4186 case TGSI_OPCODE_BGNLOOP: 4187 case TGSI_OPCODE_CAL: 4188 case TGSI_OPCODE_ELSE: 4189 case TGSI_OPCODE_ENDLOOP: 4190 case TGSI_OPCODE_IF: 4191 assert(num_dst == 0); 4192 ureg_label_insn(ureg, 4193 inst->op, 4194 src, num_src, 4195 get_label(t, 4196 inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0)); 4197 return; 4198 4199 case TGSI_OPCODE_TEX: 4200 case TGSI_OPCODE_TXB: 4201 case TGSI_OPCODE_TXD: 4202 case TGSI_OPCODE_TXL: 4203 case TGSI_OPCODE_TXP: 4204 case TGSI_OPCODE_TXQ: 4205 case TGSI_OPCODE_TXF: 4206 src[num_src++] = t->samplers[inst->sampler]; 4207 for (i = 0; i < inst->tex_offset_num_offset; i++) { 4208 texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]); 4209 } 4210 ureg_tex_insn(ureg, 4211 inst->op, 4212 dst, num_dst, 4213 st_translate_texture_target(inst->tex_target, inst->tex_shadow), 4214 texoffsets, inst->tex_offset_num_offset, 4215 src, num_src); 4216 return; 4217 4218 case TGSI_OPCODE_SCS: 4219 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY); 4220 ureg_insn(ureg, inst->op, dst, num_dst, src, num_src); 4221 break; 4222 4223 default: 4224 ureg_insn(ureg, 4225 inst->op, 4226 dst, num_dst, 4227 src, num_src); 4228 break; 4229 } 4230} 4231 4232/** 4233 * Emit the TGSI instructions for inverting and adjusting WPOS. 4234 * This code is unavoidable because it also depends on whether 4235 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). 4236 */ 4237static void 4238emit_wpos_adjustment( struct st_translate *t, 4239 const struct gl_program *program, 4240 boolean invert, 4241 GLfloat adjX, GLfloat adjY[2]) 4242{ 4243 struct ureg_program *ureg = t->ureg; 4244 4245 /* Fragment program uses fragment position input. 4246 * Need to replace instances of INPUT[WPOS] with temp T 4247 * where T = INPUT[WPOS] by y is inverted. 4248 */ 4249 static const gl_state_index wposTransformState[STATE_LENGTH] 4250 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 4251 (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; 4252 4253 /* XXX: note we are modifying the incoming shader here! Need to 4254 * do this before emitting the constant decls below, or this 4255 * will be missed: 4256 */ 4257 unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, 4258 wposTransformState); 4259 4260 struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst ); 4261 struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg ); 4262 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; 4263 4264 /* First, apply the coordinate shift: */ 4265 if (adjX || adjY[0] || adjY[1]) { 4266 if (adjY[0] != adjY[1]) { 4267 /* Adjust the y coordinate by adjY[1] or adjY[0] respectively 4268 * depending on whether inversion is actually going to be applied 4269 * or not, which is determined by testing against the inversion 4270 * state variable used below, which will be either +1 or -1. 4271 */ 4272 struct ureg_dst adj_temp = ureg_DECL_local_temporary(ureg); 4273 4274 ureg_CMP(ureg, adj_temp, 4275 ureg_scalar(wpostrans, invert ? 2 : 0), 4276 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f), 4277 ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f)); 4278 ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp)); 4279 } else { 4280 ureg_ADD(ureg, wpos_temp, wpos_input, 4281 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f)); 4282 } 4283 wpos_input = ureg_src(wpos_temp); 4284 } else { 4285 /* MOV wpos_temp, input[wpos] 4286 */ 4287 ureg_MOV( ureg, wpos_temp, wpos_input ); 4288 } 4289 4290 /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be 4291 * inversion/identity, or the other way around if we're drawing to an FBO. 4292 */ 4293 if (invert) { 4294 /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy 4295 */ 4296 ureg_MAD( ureg, 4297 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), 4298 wpos_input, 4299 ureg_scalar(wpostrans, 0), 4300 ureg_scalar(wpostrans, 1)); 4301 } else { 4302 /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww 4303 */ 4304 ureg_MAD( ureg, 4305 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), 4306 wpos_input, 4307 ureg_scalar(wpostrans, 2), 4308 ureg_scalar(wpostrans, 3)); 4309 } 4310 4311 /* Use wpos_temp as position input from here on: 4312 */ 4313 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); 4314} 4315 4316 4317/** 4318 * Emit fragment position/ooordinate code. 4319 */ 4320static void 4321emit_wpos(struct st_context *st, 4322 struct st_translate *t, 4323 const struct gl_program *program, 4324 struct ureg_program *ureg) 4325{ 4326 const struct gl_fragment_program *fp = 4327 (const struct gl_fragment_program *) program; 4328 struct pipe_screen *pscreen = st->pipe->screen; 4329 GLfloat adjX = 0.0f; 4330 GLfloat adjY[2] = { 0.0f, 0.0f }; 4331 boolean invert = FALSE; 4332 4333 /* Query the pixel center conventions supported by the pipe driver and set 4334 * adjX, adjY to help out if it cannot handle the requested one internally. 4335 * 4336 * The bias of the y-coordinate depends on whether y-inversion takes place 4337 * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are 4338 * drawing to an FBO (causes additional inversion), and whether the the pipe 4339 * driver origin and the requested origin differ (the latter condition is 4340 * stored in the 'invert' variable). 4341 * 4342 * For height = 100 (i = integer, h = half-integer, l = lower, u = upper): 4343 * 4344 * center shift only: 4345 * i -> h: +0.5 4346 * h -> i: -0.5 4347 * 4348 * inversion only: 4349 * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99 4350 * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5 4351 * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0 4352 * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5 4353 * 4354 * inversion and center shift: 4355 * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5 4356 * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99 4357 * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5 4358 * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0 4359 */ 4360 if (fp->OriginUpperLeft) { 4361 /* Fragment shader wants origin in upper-left */ 4362 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { 4363 /* the driver supports upper-left origin */ 4364 } 4365 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) { 4366 /* the driver supports lower-left origin, need to invert Y */ 4367 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 4368 invert = TRUE; 4369 } 4370 else 4371 assert(0); 4372 } 4373 else { 4374 /* Fragment shader wants origin in lower-left */ 4375 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) 4376 /* the driver supports lower-left origin */ 4377 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 4378 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) 4379 /* the driver supports upper-left origin, need to invert Y */ 4380 invert = TRUE; 4381 else 4382 assert(0); 4383 } 4384 4385 if (fp->PixelCenterInteger) { 4386 /* Fragment shader wants pixel center integer */ 4387 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 4388 /* the driver supports pixel center integer */ 4389 adjY[1] = 1.0f; 4390 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 4391 } 4392 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 4393 /* the driver supports pixel center half integer, need to bias X,Y */ 4394 adjX = -0.5f; 4395 adjY[0] = -0.5f; 4396 adjY[1] = 0.5f; 4397 } 4398 else 4399 assert(0); 4400 } 4401 else { 4402 /* Fragment shader wants pixel center half integer */ 4403 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 4404 /* the driver supports pixel center half integer */ 4405 } 4406 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 4407 /* the driver supports pixel center integer, need to bias X,Y */ 4408 adjX = adjY[0] = adjY[1] = 0.5f; 4409 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 4410 } 4411 else 4412 assert(0); 4413 } 4414 4415 /* we invert after adjustment so that we avoid the MOV to temporary, 4416 * and reuse the adjustment ADD instead */ 4417 emit_wpos_adjustment(t, program, invert, adjX, adjY); 4418} 4419 4420/** 4421 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back. 4422 * TGSI uses +1 for front, -1 for back. 4423 * This function converts the TGSI value to the GL value. Simply clamping/ 4424 * saturating the value to [0,1] does the job. 4425 */ 4426static void 4427emit_face_var(struct st_translate *t) 4428{ 4429 struct ureg_program *ureg = t->ureg; 4430 struct ureg_dst face_temp = ureg_DECL_temporary(ureg); 4431 struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]]; 4432 4433 /* MOV_SAT face_temp, input[face] */ 4434 face_temp = ureg_saturate(face_temp); 4435 ureg_MOV(ureg, face_temp, face_input); 4436 4437 /* Use face_temp as face input from here on: */ 4438 t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp); 4439} 4440 4441static void 4442emit_edgeflags(struct st_translate *t) 4443{ 4444 struct ureg_program *ureg = t->ureg; 4445 struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]]; 4446 struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]]; 4447 4448 ureg_MOV(ureg, edge_dst, edge_src); 4449} 4450 4451/** 4452 * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format. 4453 * \param program the program to translate 4454 * \param numInputs number of input registers used 4455 * \param inputMapping maps Mesa fragment program inputs to TGSI generic 4456 * input indexes 4457 * \param inputSemanticName the TGSI_SEMANTIC flag for each input 4458 * \param inputSemanticIndex the semantic index (ex: which texcoord) for 4459 * each input 4460 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input 4461 * \param numOutputs number of output registers used 4462 * \param outputMapping maps Mesa fragment program outputs to TGSI 4463 * generic outputs 4464 * \param outputSemanticName the TGSI_SEMANTIC flag for each output 4465 * \param outputSemanticIndex the semantic index (ex: which texcoord) for 4466 * each output 4467 * 4468 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY 4469 */ 4470extern "C" enum pipe_error 4471st_translate_program( 4472 struct gl_context *ctx, 4473 uint procType, 4474 struct ureg_program *ureg, 4475 glsl_to_tgsi_visitor *program, 4476 const struct gl_program *proginfo, 4477 GLuint numInputs, 4478 const GLuint inputMapping[], 4479 const ubyte inputSemanticName[], 4480 const ubyte inputSemanticIndex[], 4481 const GLuint interpMode[], 4482 GLuint numOutputs, 4483 const GLuint outputMapping[], 4484 const ubyte outputSemanticName[], 4485 const ubyte outputSemanticIndex[], 4486 boolean passthrough_edgeflags, 4487 boolean clamp_color) 4488{ 4489 struct st_translate *t; 4490 unsigned i; 4491 enum pipe_error ret = PIPE_OK; 4492 4493 assert(numInputs <= Elements(t->inputs)); 4494 assert(numOutputs <= Elements(t->outputs)); 4495 4496 t = CALLOC_STRUCT(st_translate); 4497 if (!t) { 4498 ret = PIPE_ERROR_OUT_OF_MEMORY; 4499 goto out; 4500 } 4501 4502 memset(t, 0, sizeof *t); 4503 4504 t->procType = procType; 4505 t->inputMapping = inputMapping; 4506 t->outputMapping = outputMapping; 4507 t->ureg = ureg; 4508 4509 if (program->shader_program) { 4510 for (i = 0; i < program->shader_program->NumUserUniformStorage; i++) { 4511 struct gl_uniform_storage *const storage = 4512 &program->shader_program->UniformStorage[i]; 4513 4514 _mesa_uniform_detach_all_driver_storage(storage); 4515 } 4516 } 4517 4518 /* 4519 * Declare input attributes. 4520 */ 4521 if (procType == TGSI_PROCESSOR_FRAGMENT) { 4522 for (i = 0; i < numInputs; i++) { 4523 t->inputs[i] = ureg_DECL_fs_input(ureg, 4524 inputSemanticName[i], 4525 inputSemanticIndex[i], 4526 interpMode[i]); 4527 } 4528 4529 if (proginfo->InputsRead & FRAG_BIT_WPOS) { 4530 /* Must do this after setting up t->inputs, and before 4531 * emitting constant references, below: 4532 */ 4533 emit_wpos(st_context(ctx), t, proginfo, ureg); 4534 } 4535 4536 if (proginfo->InputsRead & FRAG_BIT_FACE) 4537 emit_face_var(t); 4538 4539 /* 4540 * Declare output attributes. 4541 */ 4542 for (i = 0; i < numOutputs; i++) { 4543 switch (outputSemanticName[i]) { 4544 case TGSI_SEMANTIC_POSITION: 4545 t->outputs[i] = ureg_DECL_output(ureg, 4546 TGSI_SEMANTIC_POSITION, /* Z/Depth */ 4547 outputSemanticIndex[i]); 4548 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z); 4549 break; 4550 case TGSI_SEMANTIC_STENCIL: 4551 t->outputs[i] = ureg_DECL_output(ureg, 4552 TGSI_SEMANTIC_STENCIL, /* Stencil */ 4553 outputSemanticIndex[i]); 4554 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y); 4555 break; 4556 case TGSI_SEMANTIC_COLOR: 4557 t->outputs[i] = ureg_DECL_output(ureg, 4558 TGSI_SEMANTIC_COLOR, 4559 outputSemanticIndex[i]); 4560 break; 4561 default: 4562 assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR"); 4563 ret = PIPE_ERROR_BAD_INPUT; 4564 goto out; 4565 } 4566 } 4567 } 4568 else if (procType == TGSI_PROCESSOR_GEOMETRY) { 4569 for (i = 0; i < numInputs; i++) { 4570 t->inputs[i] = ureg_DECL_gs_input(ureg, 4571 i, 4572 inputSemanticName[i], 4573 inputSemanticIndex[i]); 4574 } 4575 4576 for (i = 0; i < numOutputs; i++) { 4577 t->outputs[i] = ureg_DECL_output(ureg, 4578 outputSemanticName[i], 4579 outputSemanticIndex[i]); 4580 } 4581 } 4582 else { 4583 assert(procType == TGSI_PROCESSOR_VERTEX); 4584 4585 for (i = 0; i < numInputs; i++) { 4586 t->inputs[i] = ureg_DECL_vs_input(ureg, i); 4587 } 4588 4589 for (i = 0; i < numOutputs; i++) { 4590 if (outputSemanticName[i] == TGSI_SEMANTIC_CLIPDIST) { 4591 int mask = ((1 << (program->num_clip_distances - 4*outputSemanticIndex[i])) - 1) & TGSI_WRITEMASK_XYZW; 4592 t->outputs[i] = ureg_DECL_output_masked(ureg, 4593 outputSemanticName[i], 4594 outputSemanticIndex[i], 4595 mask); 4596 } else { 4597 t->outputs[i] = ureg_DECL_output(ureg, 4598 outputSemanticName[i], 4599 outputSemanticIndex[i]); 4600 } 4601 } 4602 if (passthrough_edgeflags) 4603 emit_edgeflags(t); 4604 } 4605 4606 /* Declare address register. 4607 */ 4608 if (program->num_address_regs > 0) { 4609 assert(program->num_address_regs == 1); 4610 t->address[0] = ureg_DECL_address(ureg); 4611 } 4612 4613 /* Declare misc input registers 4614 */ 4615 { 4616 GLbitfield sysInputs = proginfo->SystemValuesRead; 4617 unsigned numSys = 0; 4618 for (i = 0; sysInputs; i++) { 4619 if (sysInputs & (1 << i)) { 4620 unsigned semName = mesa_sysval_to_semantic[i]; 4621 t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0); 4622 numSys++; 4623 sysInputs &= ~(1 << i); 4624 } 4625 } 4626 } 4627 4628 if (program->indirect_addr_temps) { 4629 /* If temps are accessed with indirect addressing, declare temporaries 4630 * in sequential order. Else, we declare them on demand elsewhere. 4631 * (Note: the number of temporaries is equal to program->next_temp) 4632 */ 4633 for (i = 0; i < (unsigned)program->next_temp; i++) { 4634 /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ 4635 t->temps[i] = ureg_DECL_local_temporary(t->ureg); 4636 } 4637 } 4638 4639 /* Emit constants and uniforms. TGSI uses a single index space for these, 4640 * so we put all the translated regs in t->constants. 4641 */ 4642 if (proginfo->Parameters) { 4643 t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0])); 4644 if (t->constants == NULL) { 4645 ret = PIPE_ERROR_OUT_OF_MEMORY; 4646 goto out; 4647 } 4648 4649 for (i = 0; i < proginfo->Parameters->NumParameters; i++) { 4650 switch (proginfo->Parameters->Parameters[i].Type) { 4651 case PROGRAM_ENV_PARAM: 4652 case PROGRAM_LOCAL_PARAM: 4653 case PROGRAM_STATE_VAR: 4654 case PROGRAM_NAMED_PARAM: 4655 case PROGRAM_UNIFORM: 4656 t->constants[i] = ureg_DECL_constant(ureg, i); 4657 break; 4658 4659 /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect 4660 * addressing of the const buffer. 4661 * FIXME: Be smarter and recognize param arrays: 4662 * indirect addressing is only valid within the referenced 4663 * array. 4664 */ 4665 case PROGRAM_CONSTANT: 4666 if (program->indirect_addr_consts) 4667 t->constants[i] = ureg_DECL_constant(ureg, i); 4668 else 4669 t->constants[i] = emit_immediate(t, 4670 proginfo->Parameters->ParameterValues[i], 4671 proginfo->Parameters->Parameters[i].DataType, 4672 4); 4673 break; 4674 default: 4675 break; 4676 } 4677 } 4678 } 4679 4680 /* Emit immediate values. 4681 */ 4682 t->immediates = (struct ureg_src *)CALLOC(program->num_immediates * sizeof(struct ureg_src)); 4683 if (t->immediates == NULL) { 4684 ret = PIPE_ERROR_OUT_OF_MEMORY; 4685 goto out; 4686 } 4687 i = 0; 4688 foreach_iter(exec_list_iterator, iter, program->immediates) { 4689 immediate_storage *imm = (immediate_storage *)iter.get(); 4690 assert(i < program->num_immediates); 4691 t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size); 4692 } 4693 assert(i == program->num_immediates); 4694 4695 /* texture samplers */ 4696 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { 4697 if (program->samplers_used & (1 << i)) { 4698 t->samplers[i] = ureg_DECL_sampler(ureg, i); 4699 } 4700 } 4701 4702 /* Emit each instruction in turn: 4703 */ 4704 foreach_iter(exec_list_iterator, iter, program->instructions) { 4705 set_insn_start(t, ureg_get_instruction_number(ureg)); 4706 compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get(), 4707 clamp_color); 4708 } 4709 4710 /* Fix up all emitted labels: 4711 */ 4712 for (i = 0; i < t->labels_count; i++) { 4713 ureg_fixup_label(ureg, t->labels[i].token, 4714 t->insn[t->labels[i].branch_target]); 4715 } 4716 4717 if (program->shader_program) { 4718 /* This has to be done last. Any operation the can cause 4719 * prog->ParameterValues to get reallocated (e.g., anything that adds a 4720 * program constant) has to happen before creating this linkage. 4721 */ 4722 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { 4723 if (program->shader_program->_LinkedShaders[i] == NULL) 4724 continue; 4725 4726 _mesa_associate_uniform_storage(ctx, program->shader_program, 4727 program->shader_program->_LinkedShaders[i]->Program->Parameters); 4728 } 4729 } 4730 4731out: 4732 if (t) { 4733 FREE(t->insn); 4734 FREE(t->labels); 4735 FREE(t->constants); 4736 FREE(t->immediates); 4737 4738 if (t->error) { 4739 debug_printf("%s: translate error flag set\n", __FUNCTION__); 4740 } 4741 4742 FREE(t); 4743 } 4744 4745 return ret; 4746} 4747/* ----------------------------- End TGSI code ------------------------------ */ 4748 4749/** 4750 * Convert a shader's GLSL IR into a Mesa gl_program, although without 4751 * generating Mesa IR. 4752 */ 4753static struct gl_program * 4754get_mesa_program(struct gl_context *ctx, 4755 struct gl_shader_program *shader_program, 4756 struct gl_shader *shader, 4757 int num_clip_distances) 4758{ 4759 glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor(); 4760 struct gl_program *prog; 4761 GLenum target; 4762 const char *target_string; 4763 bool progress; 4764 struct gl_shader_compiler_options *options = 4765 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)]; 4766 4767 switch (shader->Type) { 4768 case GL_VERTEX_SHADER: 4769 target = GL_VERTEX_PROGRAM_ARB; 4770 target_string = "vertex"; 4771 break; 4772 case GL_FRAGMENT_SHADER: 4773 target = GL_FRAGMENT_PROGRAM_ARB; 4774 target_string = "fragment"; 4775 break; 4776 case GL_GEOMETRY_SHADER: 4777 target = GL_GEOMETRY_PROGRAM_NV; 4778 target_string = "geometry"; 4779 break; 4780 default: 4781 assert(!"should not be reached"); 4782 return NULL; 4783 } 4784 4785 validate_ir_tree(shader->ir); 4786 4787 prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name); 4788 if (!prog) 4789 return NULL; 4790 prog->Parameters = _mesa_new_parameter_list(); 4791 v->ctx = ctx; 4792 v->prog = prog; 4793 v->shader_program = shader_program; 4794 v->options = options; 4795 v->glsl_version = ctx->Const.GLSLVersion; 4796 v->native_integers = ctx->Const.NativeIntegers; 4797 v->num_clip_distances = num_clip_distances; 4798 4799 _mesa_generate_parameters_list_for_uniforms(shader_program, shader, 4800 prog->Parameters); 4801 4802 /* Remove reads from output registers. */ 4803 lower_output_reads(shader->ir); 4804 4805 /* Emit intermediate IR for main(). */ 4806 visit_exec_list(shader->ir, v); 4807 4808 /* Now emit bodies for any functions that were used. */ 4809 do { 4810 progress = GL_FALSE; 4811 4812 foreach_iter(exec_list_iterator, iter, v->function_signatures) { 4813 function_entry *entry = (function_entry *)iter.get(); 4814 4815 if (!entry->bgn_inst) { 4816 v->current_function = entry; 4817 4818 entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB); 4819 entry->bgn_inst->function = entry; 4820 4821 visit_exec_list(&entry->sig->body, v); 4822 4823 glsl_to_tgsi_instruction *last; 4824 last = (glsl_to_tgsi_instruction *)v->instructions.get_tail(); 4825 if (last->op != TGSI_OPCODE_RET) 4826 v->emit(NULL, TGSI_OPCODE_RET); 4827 4828 glsl_to_tgsi_instruction *end; 4829 end = v->emit(NULL, TGSI_OPCODE_ENDSUB); 4830 end->function = entry; 4831 4832 progress = GL_TRUE; 4833 } 4834 } 4835 } while (progress); 4836 4837#if 0 4838 /* Print out some information (for debugging purposes) used by the 4839 * optimization passes. */ 4840 for (i=0; i < v->next_temp; i++) { 4841 int fr = v->get_first_temp_read(i); 4842 int fw = v->get_first_temp_write(i); 4843 int lr = v->get_last_temp_read(i); 4844 int lw = v->get_last_temp_write(i); 4845 4846 printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw); 4847 assert(fw <= fr); 4848 } 4849#endif 4850 4851 /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ 4852 v->simplify_cmp(); 4853 v->copy_propagate(); 4854 while (v->eliminate_dead_code_advanced()); 4855 4856 /* FIXME: These passes to optimize temporary registers don't work when there 4857 * is indirect addressing of the temporary register space. We need proper 4858 * array support so that we don't have to give up these passes in every 4859 * shader that uses arrays. 4860 */ 4861 if (!v->indirect_addr_temps) { 4862 v->eliminate_dead_code(); 4863 v->merge_registers(); 4864 v->renumber_registers(); 4865 } 4866 4867 /* Write the END instruction. */ 4868 v->emit(NULL, TGSI_OPCODE_END); 4869 4870 if (ctx->Shader.Flags & GLSL_DUMP) { 4871 printf("\n"); 4872 printf("GLSL IR for linked %s program %d:\n", target_string, 4873 shader_program->Name); 4874 _mesa_print_ir(shader->ir, NULL); 4875 printf("\n"); 4876 printf("\n"); 4877 fflush(stdout); 4878 } 4879 4880 prog->Instructions = NULL; 4881 prog->NumInstructions = 0; 4882 4883 do_set_program_inouts(shader->ir, prog, shader->Type == GL_FRAGMENT_SHADER); 4884 count_resources(v, prog); 4885 4886 _mesa_reference_program(ctx, &shader->Program, prog); 4887 4888 /* This has to be done last. Any operation the can cause 4889 * prog->ParameterValues to get reallocated (e.g., anything that adds a 4890 * program constant) has to happen before creating this linkage. 4891 */ 4892 _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters); 4893 if (!shader_program->LinkStatus) { 4894 return NULL; 4895 } 4896 4897 struct st_vertex_program *stvp; 4898 struct st_fragment_program *stfp; 4899 struct st_geometry_program *stgp; 4900 4901 switch (shader->Type) { 4902 case GL_VERTEX_SHADER: 4903 stvp = (struct st_vertex_program *)prog; 4904 stvp->glsl_to_tgsi = v; 4905 break; 4906 case GL_FRAGMENT_SHADER: 4907 stfp = (struct st_fragment_program *)prog; 4908 stfp->glsl_to_tgsi = v; 4909 break; 4910 case GL_GEOMETRY_SHADER: 4911 stgp = (struct st_geometry_program *)prog; 4912 stgp->glsl_to_tgsi = v; 4913 break; 4914 default: 4915 assert(!"should not be reached"); 4916 return NULL; 4917 } 4918 4919 return prog; 4920} 4921 4922/** 4923 * Searches through the IR for a declaration of gl_ClipDistance and returns the 4924 * declared size of the gl_ClipDistance array. Returns 0 if gl_ClipDistance is 4925 * not declared in the IR. 4926 */ 4927int get_clip_distance_size(exec_list *ir) 4928{ 4929 foreach_iter (exec_list_iterator, iter, *ir) { 4930 ir_instruction *inst = (ir_instruction *)iter.get(); 4931 ir_variable *var = inst->as_variable(); 4932 if (var == NULL) continue; 4933 if (!strcmp(var->name, "gl_ClipDistance")) { 4934 return var->type->length; 4935 } 4936 } 4937 4938 return 0; 4939} 4940 4941extern "C" { 4942 4943struct gl_shader * 4944st_new_shader(struct gl_context *ctx, GLuint name, GLuint type) 4945{ 4946 struct gl_shader *shader; 4947 assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER || 4948 type == GL_GEOMETRY_SHADER_ARB); 4949 shader = rzalloc(NULL, struct gl_shader); 4950 if (shader) { 4951 shader->Type = type; 4952 shader->Name = name; 4953 _mesa_init_shader(ctx, shader); 4954 } 4955 return shader; 4956} 4957 4958struct gl_shader_program * 4959st_new_shader_program(struct gl_context *ctx, GLuint name) 4960{ 4961 struct gl_shader_program *shProg; 4962 shProg = rzalloc(NULL, struct gl_shader_program); 4963 if (shProg) { 4964 shProg->Name = name; 4965 _mesa_init_shader_program(ctx, shProg); 4966 } 4967 return shProg; 4968} 4969 4970/** 4971 * Link a shader. 4972 * Called via ctx->Driver.LinkShader() 4973 * This actually involves converting GLSL IR into an intermediate TGSI-like IR 4974 * with code lowering and other optimizations. 4975 */ 4976GLboolean 4977st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 4978{ 4979 int num_clip_distances[MESA_SHADER_TYPES]; 4980 assert(prog->LinkStatus); 4981 4982 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { 4983 if (prog->_LinkedShaders[i] == NULL) 4984 continue; 4985 4986 bool progress; 4987 exec_list *ir = prog->_LinkedShaders[i]->ir; 4988 const struct gl_shader_compiler_options *options = 4989 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)]; 4990 4991 /* We have to determine the length of the gl_ClipDistance array before 4992 * the array is lowered to two vec4s by lower_clip_distance(). 4993 */ 4994 num_clip_distances[i] = get_clip_distance_size(ir); 4995 4996 do { 4997 unsigned what_to_lower = MOD_TO_FRACT | DIV_TO_MUL_RCP | 4998 EXP_TO_EXP2 | LOG_TO_LOG2; 4999 if (options->EmitNoPow) 5000 what_to_lower |= POW_TO_EXP2; 5001 if (!ctx->Const.NativeIntegers) 5002 what_to_lower |= INT_DIV_TO_MUL_RCP; 5003 5004 progress = false; 5005 5006 /* Lowering */ 5007 do_mat_op_to_vec(ir); 5008 lower_instructions(ir, what_to_lower); 5009 5010 progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; 5011 5012 progress = do_common_optimization(ir, true, true, 5013 options->MaxUnrollIterations) 5014 || progress; 5015 5016 progress = lower_quadop_vector(ir, false) || progress; 5017 progress = lower_clip_distance(ir) || progress; 5018 5019 if (options->MaxIfDepth == 0) 5020 progress = lower_discard(ir) || progress; 5021 5022 progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress; 5023 5024 if (options->EmitNoNoise) 5025 progress = lower_noise(ir) || progress; 5026 5027 /* If there are forms of indirect addressing that the driver 5028 * cannot handle, perform the lowering pass. 5029 */ 5030 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput 5031 || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) 5032 progress = 5033 lower_variable_index_to_cond_assign(ir, 5034 options->EmitNoIndirectInput, 5035 options->EmitNoIndirectOutput, 5036 options->EmitNoIndirectTemp, 5037 options->EmitNoIndirectUniform) 5038 || progress; 5039 5040 progress = do_vec_index_to_cond_assign(ir) || progress; 5041 } while (progress); 5042 5043 validate_ir_tree(ir); 5044 } 5045 5046 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { 5047 struct gl_program *linked_prog; 5048 5049 if (prog->_LinkedShaders[i] == NULL) 5050 continue; 5051 5052 linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i], 5053 num_clip_distances[i]); 5054 5055 if (linked_prog) { 5056 static const GLenum targets[] = { 5057 GL_VERTEX_PROGRAM_ARB, 5058 GL_FRAGMENT_PROGRAM_ARB, 5059 GL_GEOMETRY_PROGRAM_NV 5060 }; 5061 5062 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, 5063 linked_prog); 5064 if (!ctx->Driver.ProgramStringNotify(ctx, targets[i], linked_prog)) { 5065 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, 5066 NULL); 5067 _mesa_reference_program(ctx, &linked_prog, NULL); 5068 return GL_FALSE; 5069 } 5070 } 5071 5072 _mesa_reference_program(ctx, &linked_prog, NULL); 5073 } 5074 5075 return GL_TRUE; 5076} 5077 5078void 5079st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi, 5080 const GLuint outputMapping[], 5081 struct pipe_stream_output_info *so) 5082{ 5083 unsigned i; 5084 struct gl_transform_feedback_info *info = 5085 &glsl_to_tgsi->shader_program->LinkedTransformFeedback; 5086 5087 for (i = 0; i < info->NumOutputs; i++) { 5088 so->output[i].register_index = 5089 outputMapping[info->Outputs[i].OutputRegister]; 5090 so->output[i].start_component = info->Outputs[i].ComponentOffset; 5091 so->output[i].num_components = info->Outputs[i].NumComponents; 5092 so->output[i].output_buffer = info->Outputs[i].OutputBuffer; 5093 so->output[i].dst_offset = info->Outputs[i].DstOffset; 5094 } 5095 5096 for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { 5097 so->stride[i] = info->BufferStride[i]; 5098 } 5099 so->num_outputs = info->NumOutputs; 5100} 5101 5102} /* extern "C" */ 5103