st_glsl_to_tgsi.cpp revision e16b0a51be7866f3856b62b295df2bcf49e02384
1e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov/*
2e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
3e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
4ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * Copyright © 2010 Intel Corporation
5e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov * Copyright © 2011 Bryan Cain
6e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov *
7ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * Permission is hereby granted, free of charge, to any person obtaining a
8ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * copy of this software and associated documentation files (the "Software"),
9ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * to deal in the Software without restriction, including without limitation
104d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * the rights to use, copy, modify, merge, publish, distribute, sublicense,
114d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * and/or sell copies of the Software, and to permit persons to whom the
124d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * Software is furnished to do so, subject to the following conditions:
134d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann *
144d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * The above copyright notice and this permission notice (including the next
154d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * paragraph) shall be included in all copies or substantial portions of the
164d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * Software.
174d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann *
184d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
194d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
204d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
214d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
224d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
234d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
244d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * DEALINGS IN THE SOFTWARE.
254d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann */
264d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
274d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann/**
284d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * \file glsl_to_tgsi.cpp
294d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann *
304d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * Translate GLSL IR to TGSI.
314d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann */
324d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
334d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#include <stdio.h>
34ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#include "main/compiler.h"
35ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#include "ir.h"
36ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#include "ir_visitor.h"
374d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#include "ir_print_visitor.h"
38ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#include "ir_expression_flattening.h"
39ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#include "glsl_types.h"
404d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#include "glsl_parser_extras.h"
414d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#include "../glsl/program.h"
424d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#include "ir_optimization.h"
43ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#include "ast.h"
444d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
45ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#include "main/mtypes.h"
46ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#include "main/shaderobj.h"
475ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann#include "program/hash_table.h"
485ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann
495ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmannextern "C" {
505ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann#include "main/shaderapi.h"
515ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann#include "main/uniforms.h"
525ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann#include "program/prog_instruction.h"
535ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann#include "program/prog_optimize.h"
545ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann#include "program/prog_print.h"
555ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann#include "program/program.h"
565ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann#include "program/prog_parameter.h"
575ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann#include "program/sampler.h"
585ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann
595ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann#include "pipe/p_compiler.h"
605ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann#include "pipe/p_context.h"
615ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann#include "pipe/p_screen.h"
625ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann#include "pipe/p_shader_tokens.h"
634d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#include "pipe/p_state.h"
644d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#include "util/u_math.h"
654d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#include "tgsi/tgsi_ureg.h"
66e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#include "tgsi/tgsi_info.h"
674d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#include "st_context.h"
68e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#include "st_program.h"
694d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#include "st_glsl_to_tgsi.h"
704d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#include "st_mesa_to_tgsi.h"
71ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann}
724d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
734d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX
744d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) |  \
754d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann                           (1 << PROGRAM_ENV_PARAM) |    \
764d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann                           (1 << PROGRAM_STATE_VAR) |    \
774d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann                           (1 << PROGRAM_NAMED_PARAM) |  \
784d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann                           (1 << PROGRAM_CONSTANT) |     \
794d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann                           (1 << PROGRAM_UNIFORM))
804d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
814d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann/**
824d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * Maximum number of temporary registers.
834d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann *
844d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * It is too big for stack allocated arrays -- it will cause stack overflow on
854d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * Windows and likely Mac OS X.
864d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann */
874d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#define MAX_TEMPS         4096
884d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
894d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann/* will be 4 for GLSL 4.00 */
904d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#define MAX_GLSL_TEXTURE_OFFSET 1
914d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
924d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannclass st_src_reg;
934d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannclass st_dst_reg;
944d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
954d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannstatic int swizzle_for_size(int size);
964d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
974d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann/**
984d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * This struct is a corresponding struct to TGSI ureg_src.
994d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann */
100ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannclass st_src_reg {
1014d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannpublic:
1024d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   st_src_reg(gl_register_file file, int index, const glsl_type *type)
1034d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   {
1044d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->file = file;
1054d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->index = index;
1064d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
1074d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann         this->swizzle = swizzle_for_size(type->vector_elements);
1084d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      else
1094d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann         this->swizzle = SWIZZLE_XYZW;
1104d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->negate = 0;
1114d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->type = type ? type->base_type : GLSL_TYPE_ERROR;
1124d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->reladdr = NULL;
1135ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann   }
1144d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
1154d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   st_src_reg(gl_register_file file, int index, int type)
1164d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   {
1174d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->type = type;
1184d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->file = file;
1194d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->index = index;
1204d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->swizzle = SWIZZLE_XYZW;
1214d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->negate = 0;
1224d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->reladdr = NULL;
1234d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   }
1244d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
1254d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   st_src_reg()
1264d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   {
1274d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->type = GLSL_TYPE_ERROR;
1284d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->file = PROGRAM_UNDEFINED;
1294d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->index = 0;
1304d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->swizzle = 0;
1314d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->negate = 0;
1324d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->reladdr = NULL;
1334d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   }
134ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
135ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   explicit st_src_reg(st_dst_reg reg);
1364d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
1374d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   gl_register_file file; /**< PROGRAM_* from Mesa */
1384d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
1394d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
1404d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   int negate; /**< NEGATE_XYZW mask from mesa */
1414d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
1424d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   /** Register index should be offset by the integer in this reg. */
1434d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   st_src_reg *reladdr;
1444d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann};
1454d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
1464d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannclass st_dst_reg {
1474d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannpublic:
1484d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   st_dst_reg(gl_register_file file, int writemask, int type)
1494d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   {
1504d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->file = file;
1514d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->index = 0;
1524d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->writemask = writemask;
1534d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->cond_mask = COND_TR;
1544d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->reladdr = NULL;
1554d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->type = type;
1564d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   }
157ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
158ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   st_dst_reg()
1594d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   {
1604d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->type = GLSL_TYPE_ERROR;
1614d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->file = PROGRAM_UNDEFINED;
1624d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->index = 0;
1634d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->writemask = 0;
1644d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->cond_mask = COND_TR;
1654d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->reladdr = NULL;
1664d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   }
1674d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
1684d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   explicit st_dst_reg(st_src_reg reg);
1694d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
1704d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   gl_register_file file; /**< PROGRAM_* from Mesa */
1714d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
1724d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
173ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   GLuint cond_mask:4;
174ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
175ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   /** Register index should be offset by the integer in this reg. */
1764d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   st_src_reg *reladdr;
1774d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann};
1784d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
1794d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannst_src_reg::st_src_reg(st_dst_reg reg)
1804d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{
1814d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   this->type = reg.type;
1824d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   this->file = reg.file;
1834d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   this->index = reg.index;
1844d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   this->swizzle = SWIZZLE_XYZW;
1854d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   this->negate = 0;
1864d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   this->reladdr = reg.reladdr;
1874d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann}
1884d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
1894d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannst_dst_reg::st_dst_reg(st_src_reg reg)
1904d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{
1914d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   this->type = reg.type;
1924d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   this->file = reg.file;
1934d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   this->index = reg.index;
1944d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   this->writemask = WRITEMASK_XYZW;
1954d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   this->cond_mask = COND_TR;
1964d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   this->reladdr = reg.reladdr;
1974d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann}
1984d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
1994d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannclass glsl_to_tgsi_instruction : public exec_node {
2004d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannpublic:
201ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   /* Callers of this ralloc-based new need not call delete. It's
202ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    * easier to just ralloc_free 'ctx' (or any of its ancestors). */
203ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   static void* operator new(size_t size, void *ctx)
204ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   {
205ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      void *node;
2064d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
207ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      node = rzalloc_size(ctx, size);
208ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      assert(node != NULL);
209ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
210ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      return node;
211ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   }
212ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
213ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   unsigned op;
214ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   st_dst_reg dst;
215e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov   st_src_reg src[3];
216e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov   /** Pointer to the ir source this tree came from for debugging */
2174d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   ir_instruction *ir;
218ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   GLboolean cond_update;
219ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   bool saturate;
220ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   int sampler; /**< sampler index */
2214d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   int tex_target; /**< One of TEXTURE_*_INDEX */
2224d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   GLboolean tex_shadow;
2234d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
2244d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   unsigned tex_offset_num_offset;
2254d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   int dead_mask; /**< Used in dead code elimination */
2264d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
2274d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
228ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann};
2294d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
230ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannclass variable_storage : public exec_node {
231ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannpublic:
232ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   variable_storage(ir_variable *var, gl_register_file file, int index)
233ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      : file(file), index(index), var(var)
2344d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   {
235ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      /* empty */
2364d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   }
237ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
238ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   gl_register_file file;
239ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   int index;
240ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   ir_variable *var; /* variable that maps to this, if any */
241ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann};
242ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
243ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannclass immediate_storage : public exec_node {
244ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannpublic:
245ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   immediate_storage(gl_constant_value *values, int size, int type)
246ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   {
247ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      memcpy(this->values, values, size * sizeof(gl_constant_value));
248ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      this->size = size;
249ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      this->type = type;
250ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   }
251ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
252ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   gl_constant_value values[4];
253ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   int size; /**< Number of components (1-4) */
254ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
255ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann};
256ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
257ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannclass function_entry : public exec_node {
2584d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannpublic:
2594d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   ir_function_signature *sig;
2604d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
261ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   /**
2624d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann    * identifier of this function signature used by the program.
263ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    *
264ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    * At the point that TGSI instructions for function calls are
265ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    * generated, we don't know the address of the first instruction of
266ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    * the function body.  So we make the BranchTarget that is called a
2674d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann    * small integer and rewrite them during set_branchtargets().
268ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    */
2694d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   int sig_id;
270ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
271ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   /**
2724d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann    * Pointer to first instruction of the function body.
273ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    *
2744d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann    * Set during function body emits after main() is processed.
275ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    */
276ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   glsl_to_tgsi_instruction *bgn_inst;
277ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
278ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   /**
279ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    * Index of the first instruction of the function body in actual TGSI.
2804d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann    *
2814d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann    * Set after conversion from glsl_to_tgsi_instruction to TGSI.
2824d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann    */
2834d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   int inst;
2844d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
2854d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   /** Storage for the return value. */
2864d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   st_src_reg return_reg;
2874d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann};
2884d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
2894d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannclass glsl_to_tgsi_visitor : public ir_visitor {
2904d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannpublic:
2914d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   glsl_to_tgsi_visitor();
2924d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   ~glsl_to_tgsi_visitor();
2934d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
2944d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   function_entry *current_function;
295ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
2964d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   struct gl_context *ctx;
2974d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   struct gl_program *prog;
2984d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   struct gl_shader_program *shader_program;
299ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   struct gl_shader_compiler_options *options;
300ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
301ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   int next_temp;
3024d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
303ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   int num_address_regs;
3044d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   int samplers_used;
3054d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   bool indirect_addr_temps;
3064d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   bool indirect_addr_consts;
3074d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   int num_clip_distances;
3084d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
3094d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   int glsl_version;
310ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   bool native_integers;
311ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
3124d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   variable_storage *find_variable_storage(ir_variable *var);
3134d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
3144d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   int add_constant(gl_register_file file, gl_constant_value values[4],
3154d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann                    int size, int datatype, GLuint *swizzle_out);
3164d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
317ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   function_entry *get_function_signature(ir_function_signature *sig);
3184d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
319ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   st_src_reg get_temp(const glsl_type *type);
3204d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
3214d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
322ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   st_src_reg st_src_reg_for_float(float val);
323ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   st_src_reg st_src_reg_for_int(int val);
3244d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   st_src_reg st_src_reg_for_type(int type, int val);
3254d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
3264d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   /**
3274d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann    * \name Visit methods
3284d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann    *
3294d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann    * As typical for the visitor pattern, there must be one \c visit method for
3304d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann    * each concrete subclass of \c ir_instruction.  Virtual base classes within
3314d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann    * the hierarchy should not have \c visit methods.
3324d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann    */
3334d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   /*@{*/
3344d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   virtual void visit(ir_variable *);
3354d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   virtual void visit(ir_loop *);
336ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   virtual void visit(ir_loop_jump *);
3374d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   virtual void visit(ir_function_signature *);
338e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov   virtual void visit(ir_function *);
339ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   virtual void visit(ir_expression *);
340ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   virtual void visit(ir_swizzle *);
341ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   virtual void visit(ir_dereference_variable  *);
342ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   virtual void visit(ir_dereference_array *);
343ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   virtual void visit(ir_dereference_record *);
344ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   virtual void visit(ir_assignment *);
345ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   virtual void visit(ir_constant *);
346ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   virtual void visit(ir_call *);
347ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   virtual void visit(ir_return *);
348ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   virtual void visit(ir_discard *);
349ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   virtual void visit(ir_texture *);
350ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   virtual void visit(ir_if *);
351e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov   /*@}*/
352ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
353ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   st_src_reg result;
354ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
355ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   /** List of variable_storage */
356ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   exec_list variables;
3574d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
358ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   /** List of immediate_storage */
359ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   exec_list immediates;
360e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov   unsigned num_immediates;
361ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
362ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   /** List of function_entry */
363ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   exec_list function_signatures;
364ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   int next_signature_id;
3654d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
3664d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   /** List of glsl_to_tgsi_instruction */
3674d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   exec_list instructions;
3684d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
3694d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op);
370ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
371ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
3724d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann        		        st_dst_reg dst, st_src_reg src0);
3734d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
374ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
375ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann        		        st_dst_reg dst, st_src_reg src0, st_src_reg src1);
376ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
377ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
378ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann        		        st_dst_reg dst,
379ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann        		        st_src_reg src0, st_src_reg src1, st_src_reg src2);
380ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
381ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   unsigned get_opcode(ir_instruction *ir, unsigned op,
3824d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann                    st_dst_reg dst,
3835ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann                    st_src_reg src0, st_src_reg src1);
3845ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann
3855ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann   /**
3865ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann    * Emit the correct dot-product instruction for the type of arguments
3875ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann    */
3885ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann   glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir,
3895ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann                                     st_dst_reg dst,
3905ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann                                     st_src_reg src0,
3915ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann                                     st_src_reg src1,
3925ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann                                     unsigned elements);
3935ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann
3945ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann   void emit_scalar(ir_instruction *ir, unsigned op,
3955ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann        	    st_dst_reg dst, st_src_reg src0);
3965ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann
3975ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann   void emit_scalar(ir_instruction *ir, unsigned op,
3985ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann        	    st_dst_reg dst, st_src_reg src0, st_src_reg src1);
3995ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann
4005ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann   void try_emit_float_set(ir_instruction *ir, unsigned op, st_dst_reg dst);
4015ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann
4025ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann   void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
4034d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
4044d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   void emit_scs(ir_instruction *ir, unsigned op,
4054d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann        	 st_dst_reg dst, const st_src_reg &src);
406ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
407ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   bool try_emit_mad(ir_expression *ir,
408ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann              int mul_operand);
409ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   bool try_emit_mad_for_and_not(ir_expression *ir,
410ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann              int mul_operand);
411ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   bool try_emit_sat(ir_expression *ir);
4124d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
4134d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   void emit_swz(ir_expression *ir);
4144d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
415ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   bool process_move_condition(ir_rvalue *ir);
4164d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
4174d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   void simplify_cmp(void);
418ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
4194d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   void rename_temp_register(int index, int new_index);
420ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   int get_first_temp_read(int index);
421ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   int get_first_temp_write(int index);
422ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   int get_last_temp_read(int index);
423ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   int get_last_temp_write(int index);
424ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
4254d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   void copy_propagate(void);
4264d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   void eliminate_dead_code(void);
427e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov   int eliminate_dead_code_advanced(void);
428e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov   void merge_registers(void);
429e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov   void renumber_registers(void);
430e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov
431ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   void *mem_ctx;
432ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann};
433e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov
434e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovstatic st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
435ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
436ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannstatic st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
437ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
438ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannstatic st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT);
439ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
4404d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannstatic void
4414d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannfail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
442ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
4434d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannstatic void
4444d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannfail_link(struct gl_shader_program *prog, const char *fmt, ...)
4454d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{
4464d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   va_list args;
4474d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   va_start(args, fmt);
448ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   ralloc_vasprintf_append(&prog->InfoLog, fmt, args);
449ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   va_end(args);
4504d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
451ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   prog->LinkStatus = GL_FALSE;
452ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann}
4534d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
454ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannstatic int
455ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannswizzle_for_size(int size)
4564d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{
457ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   int size_swizzles[4] = {
458ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
459ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
460ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
461ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
462ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   };
463ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
464ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   assert((size >= 1) && (size <= 4));
465ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   return size_swizzles[size - 1];
466ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann}
467ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
4684d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannstatic bool
4694d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannis_tex_instruction(unsigned opcode)
4704d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{
4714d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
4724d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   return info->is_tex;
4734d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann}
4744d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
4754d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannstatic unsigned
4764d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannnum_inst_dst_regs(unsigned opcode)
4774d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{
4784d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
4794d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   return info->num_dst;
4804d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann}
4814d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
4824d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannstatic unsigned
4834d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannnum_inst_src_regs(unsigned opcode)
4844d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{
4854d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
4864d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   return info->is_tex ? info->num_src - 1 : info->num_src;
4874d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann}
488ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
489ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_instruction *
490ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
491ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann        		 st_dst_reg dst,
4924d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann        		 st_src_reg src0, st_src_reg src1, st_src_reg src2)
4934d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{
4944d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
495ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   int num_reladdr = 0, i;
496ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
4974d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   op = get_opcode(ir, op, dst, src0, src1);
498ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
499ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   /* If we have to do relative addressing, we want to load the ARL
5004d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann    * reg directly for one of the regs, and preload the other reladdr
5014d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann    * sources into temps.
5024d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann    */
5034d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   num_reladdr += dst.reladdr != NULL;
5044d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   num_reladdr += src0.reladdr != NULL;
5054d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   num_reladdr += src1.reladdr != NULL;
506ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   num_reladdr += src2.reladdr != NULL;
5074d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
508e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov   reladdr_to_temp(ir, &src2, &num_reladdr);
509e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov   reladdr_to_temp(ir, &src1, &num_reladdr);
510ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   reladdr_to_temp(ir, &src0, &num_reladdr);
511ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
512ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   if (dst.reladdr) {
513ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      emit_arl(ir, address_reg, *dst.reladdr);
5144d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      num_reladdr--;
515ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   }
516ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   assert(num_reladdr == 0);
5174d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
518ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   inst->op = op;
5194d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   inst->dst = dst;
520ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   inst->src[0] = src0;
521ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   inst->src[1] = src1;
522ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   inst->src[2] = src2;
5234d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   inst->ir = ir;
524ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   inst->dead_mask = 0;
5254d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
526ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   inst->function = NULL;
5274d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
528ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   if (op == TGSI_OPCODE_ARL || op == TGSI_OPCODE_UARL)
5294d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      this->num_address_regs = 1;
530ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
5314d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   /* Update indirect addressing status used by TGSI */
532ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   if (dst.reladdr) {
5334d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      switch(dst.file) {
5344d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      case PROGRAM_TEMPORARY:
5354d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann         this->indirect_addr_temps = true;
536ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         break;
537ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      case PROGRAM_LOCAL_PARAM:
538ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      case PROGRAM_ENV_PARAM:
5394d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      case PROGRAM_STATE_VAR:
540ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      case PROGRAM_NAMED_PARAM:
541ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      case PROGRAM_CONSTANT:
542ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      case PROGRAM_UNIFORM:
5434d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann         this->indirect_addr_consts = true;
544ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         break;
5454d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      case PROGRAM_IMMEDIATE:
5464d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann         assert(!"immediates should not have indirect addressing");
5474d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann         break;
548ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      default:
549ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         break;
5504d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      }
5514d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   }
5524d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   else {
5534d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      for (i=0; i<3; i++) {
5544d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann         if(inst->src[i].reladdr) {
555ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann            switch(inst->src[i].file) {
5564d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann            case PROGRAM_TEMPORARY:
5574d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann               this->indirect_addr_temps = true;
5584d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann               break;
559ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann            case PROGRAM_LOCAL_PARAM:
5604d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann            case PROGRAM_ENV_PARAM:
561ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann            case PROGRAM_STATE_VAR:
562ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann            case PROGRAM_NAMED_PARAM:
563ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann            case PROGRAM_CONSTANT:
5644d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann            case PROGRAM_UNIFORM:
565ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann               this->indirect_addr_consts = true;
566ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann               break;
567ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann            case PROGRAM_IMMEDIATE:
568e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov               assert(!"immediates should not have indirect addressing");
569ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann               break;
570ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann            default:
571ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann               break;
572ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann            }
5734d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann         }
5744d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      }
575ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   }
5764d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
5774d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   this->instructions.push_tail(inst);
5784d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
5794d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   if (native_integers)
5804d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      try_emit_float_set(ir, op, dst);
5814d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
5824d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   return inst;
583ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann}
5844d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
5854d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
586ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_instruction *
587ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
588ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann        		 st_dst_reg dst, st_src_reg src0, st_src_reg src1)
589ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann{
590ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   return emit(ir, op, dst, src0, src1, undef_src);
5914d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann}
5924d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
5934d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannglsl_to_tgsi_instruction *
594ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
5954d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann        		 st_dst_reg dst, st_src_reg src0)
5964d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{
5974d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   assert(dst.writemask != 0);
5984d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   return emit(ir, op, dst, src0, undef_src, undef_src);
5994d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann}
6004d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
6014d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannglsl_to_tgsi_instruction *
602ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op)
6034d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{
6044d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
605ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann}
606ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
607ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann /**
608ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * Emits the code to convert the result of float SET instructions to integers.
609ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann */
6104d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannvoid
611ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::try_emit_float_set(ir_instruction *ir, unsigned op,
612ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann        		 st_dst_reg dst)
613ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann{
614ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   if ((op == TGSI_OPCODE_SEQ ||
6154d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann        op == TGSI_OPCODE_SNE ||
616ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann        op == TGSI_OPCODE_SGE ||
617ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann        op == TGSI_OPCODE_SLT))
618ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   {
6194d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      st_src_reg src = st_src_reg(dst);
620ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      src.negate = ~src.negate;
621ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      dst.type = GLSL_TYPE_FLOAT;
6224d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      emit(ir, TGSI_OPCODE_F2I, dst, src);
623ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   }
624ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann}
6254d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
626ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann/**
627ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * Determines whether to use an integer, unsigned integer, or float opcode
628ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * based on the operands and input opcode, then emits the result.
6294d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann */
6304d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannunsigned
631ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
632ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann        		 st_dst_reg dst,
633ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann        		 st_src_reg src0, st_src_reg src1)
6344d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{
635ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   int type = GLSL_TYPE_FLOAT;
6364d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
637ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
6384d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      type = GLSL_TYPE_FLOAT;
639ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   else if (native_integers)
640ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type;
641ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
642ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#define case4(c, f, i, u) \
6434d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   case TGSI_OPCODE_##c: \
644ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \
645ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \
646ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      else op = TGSI_OPCODE_##f; \
6474d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      break;
648ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#define case3(f, i, u)  case4(f, f, i, u)
649ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#define case2fi(f, i)   case4(f, f, i, i)
650ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#define case2iu(i, u)   case4(i, LAST, i, u)
651ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
652ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   switch(op) {
653ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      case2fi(ADD, UADD);
654ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      case2fi(MUL, UMUL);
655ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      case2fi(MAD, UMAD);
656ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      case3(DIV, IDIV, UDIV);
657ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      case3(MAX, IMAX, UMAX);
658ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      case3(MIN, IMIN, UMIN);
659ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      case2iu(MOD, UMOD);
660ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
661ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      case2fi(SEQ, USEQ);
662ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      case2fi(SNE, USNE);
663ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      case3(SGE, ISGE, USGE);
664ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      case3(SLT, ISLT, USLT);
6654d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
666ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      case2iu(ISHR, USHR);
667ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
668ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      case2fi(SSG, ISSG);
6694d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      case3(ABS, IABS, IABS);
6704d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
6714d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      default: break;
672ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   }
673ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
674ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   assert(op != TGSI_OPCODE_LAST);
675ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   return op;
676ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann}
677ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
678ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_instruction *
679ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
680ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann        		    st_dst_reg dst, st_src_reg src0, st_src_reg src1,
681ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann        		    unsigned elements)
682ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann{
683ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   static const unsigned dot_opcodes[] = {
684ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
685ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   };
686ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
687e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov   return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
688ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann}
689ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
690ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann/**
691ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * Emits TGSI scalar opcodes to produce unique answers across channels.
692ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann *
693ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * Some TGSI opcodes are scalar-only, like ARB_fp/vp.  The src X
694ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * channel determines the result across all channels.  So to do a vec4
695ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * of this operation, we want to emit a scalar per source channel used
696ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * to produce dest channels.
697ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann */
698ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannvoid
699ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
7004d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann        		        st_dst_reg dst,
7014d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann        			st_src_reg orig_src0, st_src_reg orig_src1)
7024d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{
7034d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   int i, j;
7044d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   int done_mask = ~dst.writemask;
7054d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
7064d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   /* TGSI RCP is a scalar operation splatting results to all channels,
7074d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann    * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
7084d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann    * dst channels.
7094d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann    */
7104d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   for (i = 0; i < 4; i++) {
711ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      GLuint this_mask = (1 << i);
712ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      glsl_to_tgsi_instruction *inst;
7134d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      st_src_reg src0 = orig_src0;
7144d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      st_src_reg src1 = orig_src1;
7154d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
716ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      if (done_mask & this_mask)
7174d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann         continue;
718ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
719ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
720ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
7214d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      for (j = i + 1; j < 4; j++) {
7224d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann         /* If there is another enabled component in the destination that is
7234d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann          * derived from the same inputs, generate its value on this pass as
7244d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann          * well.
7254d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann          */
7264d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann         if (!(done_mask & (1 << j)) &&
7274d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann             GET_SWZ(src0.swizzle, j) == src0_swiz &&
7284d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann             GET_SWZ(src1.swizzle, j) == src1_swiz) {
7294d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann            this_mask |= (1 << j);
7304d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann         }
7314d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      }
7324d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
7334d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann        			   src0_swiz, src0_swiz);
7344d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
735ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann        			  src1_swiz, src1_swiz);
736ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
737e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov      inst = emit(ir, op, dst, src0, src1);
7384d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      inst->dst.writemask = this_mask;
739e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov      done_mask |= this_mask;
7404d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   }
741e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
742ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
743ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannvoid
744ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
745ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann        		        st_dst_reg dst, st_src_reg src0)
746ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann{
747ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   st_src_reg undef = undef_src;
748ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
749ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   undef.swizzle = SWIZZLE_XXXX;
750ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
751ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   emit_scalar(ir, op, dst, src0, undef);
7524d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann}
753ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
754ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannvoid
755ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
756e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        		        st_dst_reg dst, st_src_reg src0)
7574d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{
7584d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   int op = TGSI_OPCODE_ARL;
7594d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
7604d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT)
7614d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      op = TGSI_OPCODE_UARL;
7624d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
763e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov   emit(NULL, op, dst, src0);
7644d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann}
7654d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
7664d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann/**
7674d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * Emit an TGSI_OPCODE_SCS instruction
7684d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann *
7694d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * The \c SCS opcode functions a bit differently than the other TGSI opcodes.
770e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov * Instead of splatting its result across all four components of the
7714d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * destination, it writes one value to the \c x component and another value to
7724d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * the \c y component.
773e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov *
7744d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * \param ir        IR instruction being processed
7754d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * \param op        Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending
7764d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann *                  on which value is desired.
7774d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * \param dst       Destination register
7784d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * \param src       Source register
7794d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann */
7804d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannvoid
781ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
7824d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann        		     st_dst_reg dst,
7834d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann        		     const st_src_reg &src)
7844d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{
7854d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   /* Vertex programs cannot use the SCS opcode.
7864d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann    */
7874d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) {
7884d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      emit_scalar(ir, op, dst, src);
7894d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      return;
7904d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   }
7914d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
7924d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1;
7934d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   const unsigned scs_mask = (1U << component);
7944d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   int done_mask = ~dst.writemask;
7955ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann   st_src_reg tmp;
7965ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann
7974d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS);
7984d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
7994d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   /* If there are compnents in the destination that differ from the component
8004d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann    * that will be written by the SCS instrution, we'll need a temporary.
8014d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann    */
8024d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   if (scs_mask != unsigned(dst.writemask)) {
8034d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      tmp = get_temp(glsl_type::vec4_type);
8044d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   }
8054d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
8064d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   for (unsigned i = 0; i < 4; i++) {
8074d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      unsigned this_mask = (1U << i);
8084d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      st_src_reg src0 = src;
8094d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
810ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      if ((done_mask & this_mask) != 0)
811ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         continue;
8124d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
8134d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      /* The source swizzle specified which component of the source generates
8144d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann       * sine / cosine for the current component in the destination.  The SCS
8154d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann       * instruction requires that this value be swizzle to the X component.
8164d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann       * Replace the current swizzle with a swizzle that puts the source in
8174d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann       * the X component.
8184d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann       */
8194d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      unsigned src0_swiz = GET_SWZ(src.swizzle, i);
8204d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
8214d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
8224d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann        			   src0_swiz, src0_swiz);
8234d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      for (unsigned j = i + 1; j < 4; j++) {
8244d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann         /* If there is another enabled component in the destination that is
8254d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann          * derived from the same inputs, generate its value on this pass as
8264d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann          * well.
8274d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann          */
8284d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann         if (!(done_mask & (1 << j)) &&
8294d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann             GET_SWZ(src0.swizzle, j) == src0_swiz) {
8304d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann            this_mask |= (1 << j);
8314d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann         }
832ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      }
8334d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
834ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      if (this_mask != scs_mask) {
835ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         glsl_to_tgsi_instruction *inst;
836ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         st_dst_reg tmp_dst = st_dst_reg(tmp);
837ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
838ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         /* Emit the SCS instruction.
839ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann          */
8404d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann         inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0);
8414d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann         inst->dst.writemask = scs_mask;
8424d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
8434d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann         /* Move the result of the SCS instruction to the desired location in
8444d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann          * the destination.
8454d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann          */
8464d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann         tmp.swizzle = MAKE_SWIZZLE4(component, component,
8474d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann        			     component, component);
8484d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann         inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp);
8494d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann         inst->dst.writemask = this_mask;
8504d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      } else {
8514d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann         /* Emit the SCS instruction to write directly to the destination.
8524d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann          */
8534d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann         glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0);
8544d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann         inst->dst.writemask = scs_mask;
8554d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      }
8564d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
8574d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      done_mask |= this_mask;
8584d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   }
859ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann}
8604d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
861ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannint
862ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::add_constant(gl_register_file file,
863ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann        		     gl_constant_value values[4], int size, int datatype,
864ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann        		     GLuint *swizzle_out)
865ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann{
866ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   if (file == PROGRAM_CONSTANT) {
867ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values,
868ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                                              size, datatype, swizzle_out);
869ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   } else {
870ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      int index = 0;
871ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      immediate_storage *entry;
872ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      assert(file == PROGRAM_IMMEDIATE);
873ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
874ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      /* Search immediate storage to see if we already have an identical
875ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann       * immediate that we can use instead of adding a duplicate entry.
876ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann       */
877ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      foreach_iter(exec_list_iterator, iter, this->immediates) {
878ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         entry = (immediate_storage *)iter.get();
879ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
880ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         if (entry->size == size &&
881ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann             entry->type == datatype &&
882ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann             !memcmp(entry->values, values, size * sizeof(gl_constant_value))) {
883ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann             return index;
884ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         }
885ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         index++;
886ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      }
887ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
888ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      /* Add this immediate to the list. */
8895ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann      entry = new(mem_ctx) immediate_storage(values, size, datatype);
8905ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann      this->immediates.push_tail(entry);
891ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      this->num_immediates++;
892ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      return index;
8935ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann   }
8945ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann}
8955ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann
8965ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmannst_src_reg
8975ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmannglsl_to_tgsi_visitor::st_src_reg_for_float(float val)
8985ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann{
899ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT);
900ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   union gl_constant_value uval;
901ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
902ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   uval.f = val;
903ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle);
904ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
905ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   return src;
906ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann}
907ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
908ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannst_src_reg
909ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::st_src_reg_for_int(int val)
910ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann{
911ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
912ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   union gl_constant_value uval;
913ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
914ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   assert(native_integers);
915ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
916ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   uval.i = val;
917ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle);
918ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
919ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   return src;
920ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann}
9215ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann
9225ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmannst_src_reg
9235ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmannglsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
9245ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann{
9255ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann   if (native_integers)
9265ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann      return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) :
9275ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann                                       st_src_reg_for_int(val);
928ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   else
929ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      return st_src_reg_for_float(val);
930e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
931ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
932ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannstatic int
933ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmanntype_size(const struct glsl_type *type)
9344d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{
9354d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   unsigned int i;
9364d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   int size;
9374d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
938ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   switch (type->base_type) {
939e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov   case GLSL_TYPE_UINT:
940e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov   case GLSL_TYPE_INT:
941ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   case GLSL_TYPE_FLOAT:
942ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   case GLSL_TYPE_BOOL:
943ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      if (type->is_matrix()) {
944ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         return type->matrix_columns;
945ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      } else {
946ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         /* Regardless of size of vector, it gets a vec4. This is bad
947ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann          * packing for things like floats, but otherwise arrays become a
948ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann          * mess.  Hopefully a later pass over the code can pack scalars
949ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann          * down if appropriate.
950ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann          */
951ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         return 1;
952ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      }
953ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   case GLSL_TYPE_ARRAY:
954ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      assert(type->length > 0);
955ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      return type_size(type->fields.array) * type->length;
956ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   case GLSL_TYPE_STRUCT:
957ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      size = 0;
958ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      for (i = 0; i < type->length; i++) {
959ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         size += type_size(type->fields.structure[i].type);
960ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      }
9614d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      return size;
962ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   case GLSL_TYPE_SAMPLER:
963ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      /* Samplers take up one slot in UNIFORMS[], but they're baked in
964ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann       * at link time.
965ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann       */
966ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      return 1;
967e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov   default:
968ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      assert(0);
969ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      return 0;
970ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   }
971ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann}
972ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
973ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann/**
974ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * In the initial pass of codegen, we assign temporary numbers to
975ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * intermediate results.  (not SSA -- variable assignments will reuse
9764d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann * storage).
977ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann */
9784d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannst_src_reg
9794d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannglsl_to_tgsi_visitor::get_temp(const glsl_type *type)
9804d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{
981ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   st_src_reg src;
982ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
983ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
984e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov   src.file = PROGRAM_TEMPORARY;
985e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov   src.index = next_temp;
986ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   src.reladdr = NULL;
9874d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   next_temp += type_size(type);
988e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov
989e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov   if (type->is_array() || type->is_record()) {
990ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      src.swizzle = SWIZZLE_NOOP;
9914d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   } else {
992e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov      src.swizzle = swizzle_for_size(type->vector_elements);
993e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov   }
994ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   src.negate = 0;
9954d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
996ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   return src;
997e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
998ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
9994d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannvariable_storage *
1000ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
1001e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
1002ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
10034d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   variable_storage *entry;
1004ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1005e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov   foreach_iter(exec_list_iterator, iter, this->variables) {
10064d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      entry = (variable_storage *)iter.get();
1007ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1008ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      if (entry->var == var)
1009ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         return entry;
1010ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   }
1011ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1012ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   return NULL;
1013ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann}
10144d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
1015ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannvoid
1016ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::visit(ir_variable *ir)
1017ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann{
1018ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   if (strcmp(ir->name, "gl_FragCoord") == 0) {
1019ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
10205ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann
10215ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann      fp->OriginUpperLeft = ir->origin_upper_left;
10225ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann      fp->PixelCenterInteger = ir->pixel_center_integer;
10235ae9d0c6fd838a2967cca72aa5751b51dadc2769Philip P. Moltmann   }
1024e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov
1025e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov   if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
1026ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      unsigned int i;
1027ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      const ir_state_slot *const slots = ir->state_slots;
1028ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      assert(ir->state_slots != NULL);
1029ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1030ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      /* Check if this statevar's setup in the STATE file exactly
1031ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann       * matches how we'll want to reference it as a
10324d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann       * struct/array/whatever.  If not, then we need to move it into
1033ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann       * temporary storage and hope that it'll get copy-propagated
1034ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann       * out.
1035ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann       */
1036ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      for (i = 0; i < ir->num_state_slots; i++) {
10374d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann         if (slots[i].swizzle != SWIZZLE_XYZW) {
10384d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann            break;
1039ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         }
10404d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      }
1041ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1042ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      variable_storage *storage;
1043ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      st_dst_reg dst;
1044ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      if (i == ir->num_state_slots) {
1045ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         /* We'll set the index later. */
10464d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann         storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
10474d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann         this->variables.push_tail(storage);
10484d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
1049ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         dst = undef_dst;
1050ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      } else {
1051ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         /* The variable_storage constructor allocates slots based on the size
1052ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann          * of the type.  However, this had better match the number of state
10534d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann          * elements that we're going to copy into the new temporary.
1054ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann          */
1055e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov         assert((int) ir->num_state_slots == type_size(ir->type));
1056ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1057ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
1058ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann        					 this->next_temp);
1059ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         this->variables.push_tail(storage);
10604d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann         this->next_temp += type_size(ir->type);
1061ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1062ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index,
1063ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann               native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT));
1064e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov      }
1065ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1066ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1067ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      for (unsigned int i = 0; i < ir->num_state_slots; i++) {
1068ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         int index = _mesa_add_state_reference(this->prog->Parameters,
1069ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann        				       (gl_state_index *)slots[i].tokens);
1070ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1071ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         if (storage->file == PROGRAM_STATE_VAR) {
1072e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (storage->index == -1) {
1073ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann               storage->index = index;
1074ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann            } else {
1075ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann               assert(index == storage->index + (int)i);
1076ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann            }
10774d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann         } else {
1078ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann            st_src_reg src(PROGRAM_STATE_VAR, index,
1079ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                  native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT);
1080e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            src.swizzle = slots[i].swizzle;
1081e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            emit(ir, TGSI_OPCODE_MOV, dst, src);
1082ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann            /* even a float takes up a whole vec4 reg in a struct/array. */
1083ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann            dst.index++;
1084ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         }
1085ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      }
1086e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov
1087ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      if (storage->file == PROGRAM_TEMPORARY &&
1088ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann          dst.index != storage->index + (int) ir->num_state_slots) {
1089ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         fail_link(this->shader_program,
1090ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann        	   "failed to load builtin uniform `%s'  (%d/%d regs loaded)\n",
1091ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann        	   ir->name, dst.index - storage->index,
1092ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann        	   type_size(ir->type));
1093ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      }
1094ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   }
1095ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann}
1096e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov
1097e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid
10984d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannglsl_to_tgsi_visitor::visit(ir_loop *ir)
10994d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{
11004d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   ir_dereference_variable *counter = NULL;
11014d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
11024d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   if (ir->counter != NULL)
11034d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      counter = new(ir) ir_dereference_variable(ir->counter);
11044d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
11054d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   if (ir->from != NULL) {
11064d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      assert(ir->counter != NULL);
11074d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
11084d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
11094d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
11104d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      a->accept(this);
11114d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      delete a;
11124d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   }
11134d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
11144d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   emit(NULL, TGSI_OPCODE_BGNLOOP);
11154d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
11164d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   if (ir->to) {
1117ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      ir_expression *e =
1118ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
1119ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann        		       counter, ir->to);
1120ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      ir_if *if_stmt =  new(ir) ir_if(e);
1121ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1122ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
1123ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1124ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      if_stmt->then_instructions.push_tail(brk);
1125ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1126ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      if_stmt->accept(this);
1127ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
11284d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      delete if_stmt;
1129ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      delete e;
1130ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      delete brk;
1131ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   }
1132ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1133ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   visit_exec_list(&ir->body_instructions, this);
1134ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1135ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   if (ir->increment) {
1136ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      ir_expression *e =
1137ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         new(ir) ir_expression(ir_binop_add, counter->type,
1138ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann        		       counter, ir->increment);
1139ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1140ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
1141ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1142ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      a->accept(this);
1143ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      delete a;
1144ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      delete e;
1145ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   }
1146ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1147ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   emit(NULL, TGSI_OPCODE_ENDLOOP);
1148ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann}
1149ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1150ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannvoid
1151ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::visit(ir_loop_jump *ir)
11524d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{
1153ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   switch (ir->mode) {
1154ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   case ir_loop_jump::jump_break:
1155ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      emit(NULL, TGSI_OPCODE_BRK);
1156ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      break;
1157ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   case ir_loop_jump::jump_continue:
1158ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      emit(NULL, TGSI_OPCODE_CONT);
1159ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      break;
11604d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   }
11614d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann}
11624d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
1163ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1164ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannvoid
1165ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::visit(ir_function_signature *ir)
1166ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann{
1167ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   assert(0);
1168ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   (void)ir;
1169ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann}
1170ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1171ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannvoid
1172ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::visit(ir_function *ir)
11734d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann{
1174e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov   /* Ignore function bodies other than main() -- we shouldn't see calls to
1175ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    * them since they should all be inlined before we get to glsl_to_tgsi.
1176ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    */
1177ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   if (strcmp(ir->name, "main") == 0) {
1178ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      const ir_function_signature *sig;
1179ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      exec_list empty;
1180ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1181ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      sig = ir->matching_signature(&empty);
1182ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1183ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      assert(sig);
1184ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1185ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      foreach_iter(exec_list_iterator, iter, sig->body) {
1186ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         ir_instruction *ir = (ir_instruction *)iter.get();
1187ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1188ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann         ir->accept(this);
1189ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      }
1190ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   }
11914d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann}
1192ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1193ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannbool
1194ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
1195ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann{
1196ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   int nonmul_operand = 1 - mul_operand;
11974d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   st_src_reg a, b, c;
1198ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   st_dst_reg result_dst;
1199ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1200ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   ir_expression *expr = ir->operands[mul_operand]->as_expression();
1201ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   if (!expr || expr->operation != ir_binop_mul)
1202ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      return false;
1203ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1204ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   expr->operands[0]->accept(this);
1205ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   a = this->result;
1206ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   expr->operands[1]->accept(this);
1207ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   b = this->result;
1208ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   ir->operands[nonmul_operand]->accept(this);
1209ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   c = this->result;
1210ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1211ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   this->result = get_temp(ir->type);
1212ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   result_dst = st_dst_reg(this->result);
1213ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1214ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c);
1215ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1216ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   return true;
1217ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann}
1218ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1219ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann/**
1220ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * Emit MAD(a, -b, a) instead of AND(a, NOT(b))
1221ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann *
1222ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * The logic values are 1.0 for true and 0.0 for false.  Logical-and is
1223ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * implemented using multiplication, and logical-or is implemented using
1224ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * addition.  Logical-not can be implemented as (true - x), or (1.0 - x).
1225ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * As result, the logical expression (a & !b) can be rewritten as:
1226ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann *
12274d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann *     - a * !b
1228ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann *     - a * (1 - b)
1229ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann *     - (a * 1) - (a * b)
1230ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann *     - a + -(a * b)
1231ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann *     - a + (a * -b)
1232ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann *
1233ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * This final expression can be implemented as a single MAD(a, -b, a)
1234ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann * instruction.
1235ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann */
1236ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannbool
1237ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
1238ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann{
1239ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   const int other_operand = 1 - try_operand;
12404d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   st_src_reg a, b;
1241ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1242ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   ir_expression *expr = ir->operands[try_operand]->as_expression();
1243ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   if (!expr || expr->operation != ir_unop_logic_not)
1244ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      return false;
1245ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1246e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov   ir->operands[other_operand]->accept(this);
1247ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   a = this->result;
1248ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   expr->operands[0]->accept(this);
1249ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   b = this->result;
1250ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1251ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   b.negate = ~b.negate;
1252ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
12534d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   this->result = get_temp(ir->type);
1254ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a);
1255ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1256ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   return true;
1257ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann}
1258ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1259ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannbool
1260ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannglsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
1261ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann{
12624d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   /* Saturates were only introduced to vertex programs in
1263ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    * NV_vertex_program3, so don't give them to drivers in the VP.
1264ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    */
12654d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann   if (this->prog->Target == GL_VERTEX_PROGRAM_ARB)
1266ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      return false;
1267ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1268ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
1269ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann   if (!sat_src)
1270ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      return false;
1271ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
1272e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov   sat_src->accept(this);
1273   st_src_reg src = this->result;
1274
1275   /* If we generated an expression instruction into a temporary in
1276    * processing the saturate's operand, apply the saturate to that
1277    * instruction.  Otherwise, generate a MOV to do the saturate.
1278    *
1279    * Note that we have to be careful to only do this optimization if
1280    * the instruction in question was what generated src->result.  For
1281    * example, ir_dereference_array might generate a MUL instruction
1282    * to create the reladdr, and return us a src reg using that
1283    * reladdr.  That MUL result is not the value we're trying to
1284    * saturate.
1285    */
1286   ir_expression *sat_src_expr = sat_src->as_expression();
1287   if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
1288			sat_src_expr->operation == ir_binop_add ||
1289			sat_src_expr->operation == ir_binop_dot)) {
1290      glsl_to_tgsi_instruction *new_inst;
1291      new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
1292      new_inst->saturate = true;
1293   } else {
1294      this->result = get_temp(ir->type);
1295      st_dst_reg result_dst = st_dst_reg(this->result);
1296      result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1297      glsl_to_tgsi_instruction *inst;
1298      inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src);
1299      inst->saturate = true;
1300   }
1301
1302   return true;
1303}
1304
1305void
1306glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
1307        			    st_src_reg *reg, int *num_reladdr)
1308{
1309   if (!reg->reladdr)
1310      return;
1311
1312   emit_arl(ir, address_reg, *reg->reladdr);
1313
1314   if (*num_reladdr != 1) {
1315      st_src_reg temp = get_temp(glsl_type::vec4_type);
1316
1317      emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
1318      *reg = temp;
1319   }
1320
1321   (*num_reladdr)--;
1322}
1323
1324void
1325glsl_to_tgsi_visitor::visit(ir_expression *ir)
1326{
1327   unsigned int operand;
1328   st_src_reg op[Elements(ir->operands)];
1329   st_src_reg result_src;
1330   st_dst_reg result_dst;
1331
1332   /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c)
1333    */
1334   if (ir->operation == ir_binop_add) {
1335      if (try_emit_mad(ir, 1))
1336         return;
1337      if (try_emit_mad(ir, 0))
1338         return;
1339   }
1340
1341   /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
1342    */
1343   if (ir->operation == ir_binop_logic_and) {
1344      if (try_emit_mad_for_and_not(ir, 1))
1345	 return;
1346      if (try_emit_mad_for_and_not(ir, 0))
1347	 return;
1348   }
1349
1350   if (try_emit_sat(ir))
1351      return;
1352
1353   if (ir->operation == ir_quadop_vector)
1354      assert(!"ir_quadop_vector should have been lowered");
1355
1356   for (operand = 0; operand < ir->get_num_operands(); operand++) {
1357      this->result.file = PROGRAM_UNDEFINED;
1358      ir->operands[operand]->accept(this);
1359      if (this->result.file == PROGRAM_UNDEFINED) {
1360         ir_print_visitor v;
1361         printf("Failed to get tree for expression operand:\n");
1362         ir->operands[operand]->accept(&v);
1363         exit(1);
1364      }
1365      op[operand] = this->result;
1366
1367      /* Matrix expression operands should have been broken down to vector
1368       * operations already.
1369       */
1370      assert(!ir->operands[operand]->type->is_matrix());
1371   }
1372
1373   int vector_elements = ir->operands[0]->type->vector_elements;
1374   if (ir->operands[1]) {
1375      vector_elements = MAX2(vector_elements,
1376        		     ir->operands[1]->type->vector_elements);
1377   }
1378
1379   this->result.file = PROGRAM_UNDEFINED;
1380
1381   /* Storage for our result.  Ideally for an assignment we'd be using
1382    * the actual storage for the result here, instead.
1383    */
1384   result_src = get_temp(ir->type);
1385   /* convenience for the emit functions below. */
1386   result_dst = st_dst_reg(result_src);
1387   /* Limit writes to the channels that will be used by result_src later.
1388    * This does limit this temp's use as a temporary for multi-instruction
1389    * sequences.
1390    */
1391   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1392
1393   switch (ir->operation) {
1394   case ir_unop_logic_not:
1395      if (result_dst.type != GLSL_TYPE_FLOAT)
1396         emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
1397      else {
1398         /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
1399          * older GPUs implement SEQ using multiple instructions (i915 uses two
1400          * SGE instructions and a MUL instruction).  Since our logic values are
1401          * 0.0 and 1.0, 1-x also implements !x.
1402          */
1403         op[0].negate = ~op[0].negate;
1404         emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0));
1405      }
1406      break;
1407   case ir_unop_neg:
1408      if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT)
1409         emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
1410      else {
1411         op[0].negate = ~op[0].negate;
1412         result_src = op[0];
1413      }
1414      break;
1415   case ir_unop_abs:
1416      emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
1417      break;
1418   case ir_unop_sign:
1419      emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]);
1420      break;
1421   case ir_unop_rcp:
1422      emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]);
1423      break;
1424
1425   case ir_unop_exp2:
1426      emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]);
1427      break;
1428   case ir_unop_exp:
1429   case ir_unop_log:
1430      assert(!"not reached: should be handled by ir_explog_to_explog2");
1431      break;
1432   case ir_unop_log2:
1433      emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]);
1434      break;
1435   case ir_unop_sin:
1436      emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
1437      break;
1438   case ir_unop_cos:
1439      emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]);
1440      break;
1441   case ir_unop_sin_reduced:
1442      emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
1443      break;
1444   case ir_unop_cos_reduced:
1445      emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]);
1446      break;
1447
1448   case ir_unop_dFdx:
1449      emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]);
1450      break;
1451   case ir_unop_dFdy:
1452      op[0].negate = ~op[0].negate;
1453      emit(ir, TGSI_OPCODE_DDY, result_dst, op[0]);
1454      break;
1455
1456   case ir_unop_noise: {
1457      /* At some point, a motivated person could add a better
1458       * implementation of noise.  Currently not even the nvidia
1459       * binary drivers do anything more than this.  In any case, the
1460       * place to do this is in the GL state tracker, not the poor
1461       * driver.
1462       */
1463      emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5));
1464      break;
1465   }
1466
1467   case ir_binop_add:
1468      emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
1469      break;
1470   case ir_binop_sub:
1471      emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]);
1472      break;
1473
1474   case ir_binop_mul:
1475      emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
1476      break;
1477   case ir_binop_div:
1478      if (result_dst.type == GLSL_TYPE_FLOAT)
1479         assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1480      else
1481         emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
1482      break;
1483   case ir_binop_mod:
1484      if (result_dst.type == GLSL_TYPE_FLOAT)
1485         assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1486      else
1487         emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]);
1488      break;
1489
1490   case ir_binop_less:
1491      emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
1492      break;
1493   case ir_binop_greater:
1494      emit(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]);
1495      break;
1496   case ir_binop_lequal:
1497      emit(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]);
1498      break;
1499   case ir_binop_gequal:
1500      emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
1501      break;
1502   case ir_binop_equal:
1503      emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
1504      break;
1505   case ir_binop_nequal:
1506      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1507      break;
1508   case ir_binop_all_equal:
1509      /* "==" operator producing a scalar boolean. */
1510      if (ir->operands[0]->type->is_vector() ||
1511          ir->operands[1]->type->is_vector()) {
1512         st_src_reg temp = get_temp(native_integers ?
1513               glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
1514               glsl_type::vec4_type);
1515
1516         if (native_integers) {
1517            st_dst_reg temp_dst = st_dst_reg(temp);
1518            st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
1519
1520            emit(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]);
1521
1522            /* Emit 1-3 AND operations to combine the SEQ results. */
1523            switch (ir->operands[0]->type->vector_elements) {
1524            case 2:
1525               break;
1526            case 3:
1527               temp_dst.writemask = WRITEMASK_Y;
1528               temp1.swizzle = SWIZZLE_YYYY;
1529               temp2.swizzle = SWIZZLE_ZZZZ;
1530               emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1531               break;
1532            case 4:
1533               temp_dst.writemask = WRITEMASK_X;
1534               temp1.swizzle = SWIZZLE_XXXX;
1535               temp2.swizzle = SWIZZLE_YYYY;
1536               emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1537               temp_dst.writemask = WRITEMASK_Y;
1538               temp1.swizzle = SWIZZLE_ZZZZ;
1539               temp2.swizzle = SWIZZLE_WWWW;
1540               emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1541            }
1542
1543            temp1.swizzle = SWIZZLE_XXXX;
1544            temp2.swizzle = SWIZZLE_YYYY;
1545            emit(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2);
1546         } else {
1547            emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1548
1549            /* After the dot-product, the value will be an integer on the
1550             * range [0,4].  Zero becomes 1.0, and positive values become zero.
1551             */
1552            emit_dp(ir, result_dst, temp, temp, vector_elements);
1553
1554            /* Negating the result of the dot-product gives values on the range
1555             * [-4, 0].  Zero becomes 1.0, and negative values become zero.
1556             * This is achieved using SGE.
1557             */
1558            st_src_reg sge_src = result_src;
1559            sge_src.negate = ~sge_src.negate;
1560            emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0));
1561         }
1562      } else {
1563         emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
1564      }
1565      break;
1566   case ir_binop_any_nequal:
1567      /* "!=" operator producing a scalar boolean. */
1568      if (ir->operands[0]->type->is_vector() ||
1569          ir->operands[1]->type->is_vector()) {
1570         st_src_reg temp = get_temp(native_integers ?
1571               glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
1572               glsl_type::vec4_type);
1573         emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1574
1575         if (native_integers) {
1576            st_dst_reg temp_dst = st_dst_reg(temp);
1577            st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
1578
1579            /* Emit 1-3 OR operations to combine the SNE results. */
1580            switch (ir->operands[0]->type->vector_elements) {
1581            case 2:
1582               break;
1583            case 3:
1584               temp_dst.writemask = WRITEMASK_Y;
1585               temp1.swizzle = SWIZZLE_YYYY;
1586               temp2.swizzle = SWIZZLE_ZZZZ;
1587               emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1588               break;
1589            case 4:
1590               temp_dst.writemask = WRITEMASK_X;
1591               temp1.swizzle = SWIZZLE_XXXX;
1592               temp2.swizzle = SWIZZLE_YYYY;
1593               emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1594               temp_dst.writemask = WRITEMASK_Y;
1595               temp1.swizzle = SWIZZLE_ZZZZ;
1596               temp2.swizzle = SWIZZLE_WWWW;
1597               emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1598            }
1599
1600            temp1.swizzle = SWIZZLE_XXXX;
1601            temp2.swizzle = SWIZZLE_YYYY;
1602            emit(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2);
1603         } else {
1604            /* After the dot-product, the value will be an integer on the
1605             * range [0,4].  Zero stays zero, and positive values become 1.0.
1606             */
1607            glsl_to_tgsi_instruction *const dp =
1608                  emit_dp(ir, result_dst, temp, temp, vector_elements);
1609            if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1610               /* The clamping to [0,1] can be done for free in the fragment
1611                * shader with a saturate.
1612                */
1613               dp->saturate = true;
1614            } else {
1615               /* Negating the result of the dot-product gives values on the range
1616                * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
1617                * achieved using SLT.
1618                */
1619               st_src_reg slt_src = result_src;
1620               slt_src.negate = ~slt_src.negate;
1621               emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1622            }
1623         }
1624      } else {
1625         emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1626      }
1627      break;
1628
1629   case ir_unop_any: {
1630      assert(ir->operands[0]->type->is_vector());
1631
1632      /* After the dot-product, the value will be an integer on the
1633       * range [0,4].  Zero stays zero, and positive values become 1.0.
1634       */
1635      glsl_to_tgsi_instruction *const dp =
1636         emit_dp(ir, result_dst, op[0], op[0],
1637                 ir->operands[0]->type->vector_elements);
1638      if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
1639          result_dst.type == GLSL_TYPE_FLOAT) {
1640	      /* The clamping to [0,1] can be done for free in the fragment
1641	       * shader with a saturate.
1642	       */
1643	      dp->saturate = true;
1644      } else if (result_dst.type == GLSL_TYPE_FLOAT) {
1645	      /* Negating the result of the dot-product gives values on the range
1646	       * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
1647	       * is achieved using SLT.
1648	       */
1649	      st_src_reg slt_src = result_src;
1650	      slt_src.negate = ~slt_src.negate;
1651	      emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1652      }
1653      else {
1654         /* Use SNE 0 if integers are being used as boolean values. */
1655         emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
1656      }
1657      break;
1658   }
1659
1660   case ir_binop_logic_xor:
1661      if (native_integers)
1662         emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
1663      else
1664         emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1665      break;
1666
1667   case ir_binop_logic_or: {
1668      if (native_integers) {
1669         /* If integers are used as booleans, we can use an actual "or"
1670          * instruction.
1671          */
1672         assert(native_integers);
1673         emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
1674      } else {
1675         /* After the addition, the value will be an integer on the
1676          * range [0,2].  Zero stays zero, and positive values become 1.0.
1677          */
1678         glsl_to_tgsi_instruction *add =
1679            emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
1680         if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1681            /* The clamping to [0,1] can be done for free in the fragment
1682             * shader with a saturate if floats are being used as boolean values.
1683             */
1684            add->saturate = true;
1685         } else {
1686            /* Negating the result of the addition gives values on the range
1687             * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
1688             * is achieved using SLT.
1689             */
1690            st_src_reg slt_src = result_src;
1691            slt_src.negate = ~slt_src.negate;
1692            emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1693         }
1694      }
1695      break;
1696   }
1697
1698   case ir_binop_logic_and:
1699      /* If native integers are disabled, the bool args are stored as float 0.0
1700       * or 1.0, so "mul" gives us "and".  If they're enabled, just use the
1701       * actual AND opcode.
1702       */
1703      if (native_integers)
1704         emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
1705      else
1706         emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
1707      break;
1708
1709   case ir_binop_dot:
1710      assert(ir->operands[0]->type->is_vector());
1711      assert(ir->operands[0]->type == ir->operands[1]->type);
1712      emit_dp(ir, result_dst, op[0], op[1],
1713              ir->operands[0]->type->vector_elements);
1714      break;
1715
1716   case ir_unop_sqrt:
1717      /* sqrt(x) = x * rsq(x). */
1718      emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
1719      emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]);
1720      /* For incoming channels <= 0, set the result to 0. */
1721      op[0].negate = ~op[0].negate;
1722      emit(ir, TGSI_OPCODE_CMP, result_dst,
1723        		  op[0], result_src, st_src_reg_for_float(0.0));
1724      break;
1725   case ir_unop_rsq:
1726      emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
1727      break;
1728   case ir_unop_i2f:
1729      if (native_integers) {
1730         emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
1731         break;
1732      }
1733      /* fallthrough to next case otherwise */
1734   case ir_unop_b2f:
1735      if (native_integers) {
1736         emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0));
1737         break;
1738      }
1739      /* fallthrough to next case otherwise */
1740   case ir_unop_i2u:
1741   case ir_unop_u2i:
1742      /* Converting between signed and unsigned integers is a no-op. */
1743      result_src = op[0];
1744      break;
1745   case ir_unop_b2i:
1746      if (native_integers) {
1747         /* Booleans are stored as integers using ~0 for true and 0 for false.
1748          * GLSL requires that int(bool) return 1 for true and 0 for false.
1749          * This conversion is done with AND, but it could be done with NEG.
1750          */
1751         emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1));
1752      } else {
1753         /* Booleans and integers are both stored as floats when native
1754          * integers are disabled.
1755          */
1756         result_src = op[0];
1757      }
1758      break;
1759   case ir_unop_f2i:
1760      if (native_integers)
1761         emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
1762      else
1763         emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
1764      break;
1765   case ir_unop_bitcast_f2i:
1766   case ir_unop_bitcast_f2u:
1767   case ir_unop_bitcast_i2f:
1768   case ir_unop_bitcast_u2f:
1769      result_src = op[0];
1770      break;
1771   case ir_unop_f2b:
1772      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
1773      break;
1774   case ir_unop_i2b:
1775      if (native_integers)
1776         emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
1777      else
1778         emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
1779      break;
1780   case ir_unop_trunc:
1781      emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
1782      break;
1783   case ir_unop_ceil:
1784      emit(ir, TGSI_OPCODE_CEIL, result_dst, op[0]);
1785      break;
1786   case ir_unop_floor:
1787      emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
1788      break;
1789   case ir_unop_round_even:
1790      emit(ir, TGSI_OPCODE_ROUND, result_dst, op[0]);
1791      break;
1792   case ir_unop_fract:
1793      emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]);
1794      break;
1795
1796   case ir_binop_min:
1797      emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]);
1798      break;
1799   case ir_binop_max:
1800      emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]);
1801      break;
1802   case ir_binop_pow:
1803      emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]);
1804      break;
1805
1806   case ir_unop_bit_not:
1807      if (native_integers) {
1808         emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
1809         break;
1810      }
1811   case ir_unop_u2f:
1812      if (native_integers) {
1813         emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
1814         break;
1815      }
1816   case ir_binop_lshift:
1817      if (native_integers) {
1818         emit(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]);
1819         break;
1820      }
1821   case ir_binop_rshift:
1822      if (native_integers) {
1823         emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]);
1824         break;
1825      }
1826   case ir_binop_bit_and:
1827      if (native_integers) {
1828         emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
1829         break;
1830      }
1831   case ir_binop_bit_xor:
1832      if (native_integers) {
1833         emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
1834         break;
1835      }
1836   case ir_binop_bit_or:
1837      if (native_integers) {
1838         emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
1839         break;
1840      }
1841
1842      assert(!"GLSL 1.30 features unsupported");
1843      break;
1844
1845   case ir_quadop_vector:
1846      /* This operation should have already been handled.
1847       */
1848      assert(!"Should not get here.");
1849      break;
1850   }
1851
1852   this->result = result_src;
1853}
1854
1855
1856void
1857glsl_to_tgsi_visitor::visit(ir_swizzle *ir)
1858{
1859   st_src_reg src;
1860   int i;
1861   int swizzle[4];
1862
1863   /* Note that this is only swizzles in expressions, not those on the left
1864    * hand side of an assignment, which do write masking.  See ir_assignment
1865    * for that.
1866    */
1867
1868   ir->val->accept(this);
1869   src = this->result;
1870   assert(src.file != PROGRAM_UNDEFINED);
1871
1872   for (i = 0; i < 4; i++) {
1873      if (i < ir->type->vector_elements) {
1874         switch (i) {
1875         case 0:
1876            swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
1877            break;
1878         case 1:
1879            swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
1880            break;
1881         case 2:
1882            swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
1883            break;
1884         case 3:
1885            swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
1886            break;
1887         }
1888      } else {
1889         /* If the type is smaller than a vec4, replicate the last
1890          * channel out.
1891          */
1892         swizzle[i] = swizzle[ir->type->vector_elements - 1];
1893      }
1894   }
1895
1896   src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1897
1898   this->result = src;
1899}
1900
1901void
1902glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
1903{
1904   variable_storage *entry = find_variable_storage(ir->var);
1905   ir_variable *var = ir->var;
1906
1907   if (!entry) {
1908      switch (var->mode) {
1909      case ir_var_uniform:
1910         entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
1911        				       var->location);
1912         this->variables.push_tail(entry);
1913         break;
1914      case ir_var_in:
1915      case ir_var_inout:
1916         /* The linker assigns locations for varyings and attributes,
1917          * including deprecated builtins (like gl_Color), user-assign
1918          * generic attributes (glBindVertexLocation), and
1919          * user-defined varyings.
1920          *
1921          * FINISHME: We would hit this path for function arguments.  Fix!
1922          */
1923         assert(var->location != -1);
1924         entry = new(mem_ctx) variable_storage(var,
1925                                               PROGRAM_INPUT,
1926                                               var->location);
1927         break;
1928      case ir_var_out:
1929         assert(var->location != -1);
1930         entry = new(mem_ctx) variable_storage(var,
1931                                               PROGRAM_OUTPUT,
1932                                               var->location + var->index);
1933         break;
1934      case ir_var_system_value:
1935         entry = new(mem_ctx) variable_storage(var,
1936                                               PROGRAM_SYSTEM_VALUE,
1937                                               var->location);
1938         break;
1939      case ir_var_auto:
1940      case ir_var_temporary:
1941         entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY,
1942        				       this->next_temp);
1943         this->variables.push_tail(entry);
1944
1945         next_temp += type_size(var->type);
1946         break;
1947      }
1948
1949      if (!entry) {
1950         printf("Failed to make storage for %s\n", var->name);
1951         exit(1);
1952      }
1953   }
1954
1955   this->result = st_src_reg(entry->file, entry->index, var->type);
1956   if (!native_integers)
1957      this->result.type = GLSL_TYPE_FLOAT;
1958}
1959
1960void
1961glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
1962{
1963   ir_constant *index;
1964   st_src_reg src;
1965   int element_size = type_size(ir->type);
1966
1967   index = ir->array_index->constant_expression_value();
1968
1969   ir->array->accept(this);
1970   src = this->result;
1971
1972   if (index) {
1973      src.index += index->value.i[0] * element_size;
1974   } else {
1975      /* Variable index array dereference.  It eats the "vec4" of the
1976       * base of the array and an index that offsets the TGSI register
1977       * index.
1978       */
1979      ir->array_index->accept(this);
1980
1981      st_src_reg index_reg;
1982
1983      if (element_size == 1) {
1984         index_reg = this->result;
1985      } else {
1986         index_reg = get_temp(native_integers ?
1987                              glsl_type::int_type : glsl_type::float_type);
1988
1989         emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg),
1990              this->result, st_src_reg_for_type(index_reg.type, element_size));
1991      }
1992
1993      /* If there was already a relative address register involved, add the
1994       * new and the old together to get the new offset.
1995       */
1996      if (src.reladdr != NULL) {
1997         st_src_reg accum_reg = get_temp(native_integers ?
1998                                glsl_type::int_type : glsl_type::float_type);
1999
2000         emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg),
2001              index_reg, *src.reladdr);
2002
2003         index_reg = accum_reg;
2004      }
2005
2006      src.reladdr = ralloc(mem_ctx, st_src_reg);
2007      memcpy(src.reladdr, &index_reg, sizeof(index_reg));
2008   }
2009
2010   /* If the type is smaller than a vec4, replicate the last channel out. */
2011   if (ir->type->is_scalar() || ir->type->is_vector())
2012      src.swizzle = swizzle_for_size(ir->type->vector_elements);
2013   else
2014      src.swizzle = SWIZZLE_NOOP;
2015
2016   this->result = src;
2017}
2018
2019void
2020glsl_to_tgsi_visitor::visit(ir_dereference_record *ir)
2021{
2022   unsigned int i;
2023   const glsl_type *struct_type = ir->record->type;
2024   int offset = 0;
2025
2026   ir->record->accept(this);
2027
2028   for (i = 0; i < struct_type->length; i++) {
2029      if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
2030         break;
2031      offset += type_size(struct_type->fields.structure[i].type);
2032   }
2033
2034   /* If the type is smaller than a vec4, replicate the last channel out. */
2035   if (ir->type->is_scalar() || ir->type->is_vector())
2036      this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
2037   else
2038      this->result.swizzle = SWIZZLE_NOOP;
2039
2040   this->result.index += offset;
2041}
2042
2043/**
2044 * We want to be careful in assignment setup to hit the actual storage
2045 * instead of potentially using a temporary like we might with the
2046 * ir_dereference handler.
2047 */
2048static st_dst_reg
2049get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v)
2050{
2051   /* The LHS must be a dereference.  If the LHS is a variable indexed array
2052    * access of a vector, it must be separated into a series conditional moves
2053    * before reaching this point (see ir_vec_index_to_cond_assign).
2054    */
2055   assert(ir->as_dereference());
2056   ir_dereference_array *deref_array = ir->as_dereference_array();
2057   if (deref_array) {
2058      assert(!deref_array->array->type->is_vector());
2059   }
2060
2061   /* Use the rvalue deref handler for the most part.  We'll ignore
2062    * swizzles in it and write swizzles using writemask, though.
2063    */
2064   ir->accept(v);
2065   return st_dst_reg(v->result);
2066}
2067
2068/**
2069 * Process the condition of a conditional assignment
2070 *
2071 * Examines the condition of a conditional assignment to generate the optimal
2072 * first operand of a \c CMP instruction.  If the condition is a relational
2073 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
2074 * used as the source for the \c CMP instruction.  Otherwise the comparison
2075 * is processed to a boolean result, and the boolean result is used as the
2076 * operand to the CMP instruction.
2077 */
2078bool
2079glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir)
2080{
2081   ir_rvalue *src_ir = ir;
2082   bool negate = true;
2083   bool switch_order = false;
2084
2085   ir_expression *const expr = ir->as_expression();
2086   if ((expr != NULL) && (expr->get_num_operands() == 2)) {
2087      bool zero_on_left = false;
2088
2089      if (expr->operands[0]->is_zero()) {
2090         src_ir = expr->operands[1];
2091         zero_on_left = true;
2092      } else if (expr->operands[1]->is_zero()) {
2093         src_ir = expr->operands[0];
2094         zero_on_left = false;
2095      }
2096
2097      /*      a is -  0  +            -  0  +
2098       * (a <  0)  T  F  F  ( a < 0)  T  F  F
2099       * (0 <  a)  F  F  T  (-a < 0)  F  F  T
2100       * (a <= 0)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
2101       * (0 <= a)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
2102       * (a >  0)  F  F  T  (-a < 0)  F  F  T
2103       * (0 >  a)  T  F  F  ( a < 0)  T  F  F
2104       * (a >= 0)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
2105       * (0 >= a)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
2106       *
2107       * Note that exchanging the order of 0 and 'a' in the comparison simply
2108       * means that the value of 'a' should be negated.
2109       */
2110      if (src_ir != ir) {
2111         switch (expr->operation) {
2112         case ir_binop_less:
2113            switch_order = false;
2114            negate = zero_on_left;
2115            break;
2116
2117         case ir_binop_greater:
2118            switch_order = false;
2119            negate = !zero_on_left;
2120            break;
2121
2122         case ir_binop_lequal:
2123            switch_order = true;
2124            negate = !zero_on_left;
2125            break;
2126
2127         case ir_binop_gequal:
2128            switch_order = true;
2129            negate = zero_on_left;
2130            break;
2131
2132         default:
2133            /* This isn't the right kind of comparison afterall, so make sure
2134             * the whole condition is visited.
2135             */
2136            src_ir = ir;
2137            break;
2138         }
2139      }
2140   }
2141
2142   src_ir->accept(this);
2143
2144   /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
2145    * condition we produced is 0.0 or 1.0.  By flipping the sign, we can
2146    * choose which value TGSI_OPCODE_CMP produces without an extra instruction
2147    * computing the condition.
2148    */
2149   if (negate)
2150      this->result.negate = ~this->result.negate;
2151
2152   return switch_order;
2153}
2154
2155void
2156glsl_to_tgsi_visitor::visit(ir_assignment *ir)
2157{
2158   st_dst_reg l;
2159   st_src_reg r;
2160   int i;
2161
2162   ir->rhs->accept(this);
2163   r = this->result;
2164
2165   l = get_assignment_lhs(ir->lhs, this);
2166
2167   /* FINISHME: This should really set to the correct maximal writemask for each
2168    * FINISHME: component written (in the loops below).  This case can only
2169    * FINISHME: occur for matrices, arrays, and structures.
2170    */
2171   if (ir->write_mask == 0) {
2172      assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
2173      l.writemask = WRITEMASK_XYZW;
2174   } else if (ir->lhs->type->is_scalar() &&
2175              ir->lhs->variable_referenced()->mode == ir_var_out) {
2176      /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
2177       * FINISHME: W component of fragment shader output zero, work correctly.
2178       */
2179      l.writemask = WRITEMASK_XYZW;
2180   } else {
2181      int swizzles[4];
2182      int first_enabled_chan = 0;
2183      int rhs_chan = 0;
2184
2185      l.writemask = ir->write_mask;
2186
2187      for (int i = 0; i < 4; i++) {
2188         if (l.writemask & (1 << i)) {
2189            first_enabled_chan = GET_SWZ(r.swizzle, i);
2190            break;
2191         }
2192      }
2193
2194      /* Swizzle a small RHS vector into the channels being written.
2195       *
2196       * glsl ir treats write_mask as dictating how many channels are
2197       * present on the RHS while TGSI treats write_mask as just
2198       * showing which channels of the vec4 RHS get written.
2199       */
2200      for (int i = 0; i < 4; i++) {
2201         if (l.writemask & (1 << i))
2202            swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
2203         else
2204            swizzles[i] = first_enabled_chan;
2205      }
2206      r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
2207        			swizzles[2], swizzles[3]);
2208   }
2209
2210   assert(l.file != PROGRAM_UNDEFINED);
2211   assert(r.file != PROGRAM_UNDEFINED);
2212
2213   if (ir->condition) {
2214      const bool switch_order = this->process_move_condition(ir->condition);
2215      st_src_reg condition = this->result;
2216
2217      for (i = 0; i < type_size(ir->lhs->type); i++) {
2218         st_src_reg l_src = st_src_reg(l);
2219         st_src_reg condition_temp = condition;
2220         l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements);
2221
2222         if (native_integers) {
2223            /* This is necessary because TGSI's CMP instruction expects the
2224             * condition to be a float, and we store booleans as integers.
2225             * If TGSI had a UCMP instruction or similar, this extra
2226             * instruction would not be necessary.
2227             */
2228            condition_temp = get_temp(glsl_type::vec4_type);
2229            condition.negate = 0;
2230            emit(ir, TGSI_OPCODE_I2F, st_dst_reg(condition_temp), condition);
2231            condition_temp.swizzle = condition.swizzle;
2232         }
2233
2234         if (switch_order) {
2235            emit(ir, TGSI_OPCODE_CMP, l, condition_temp, l_src, r);
2236         } else {
2237            emit(ir, TGSI_OPCODE_CMP, l, condition_temp, r, l_src);
2238         }
2239
2240         l.index++;
2241         r.index++;
2242      }
2243   } else if (ir->rhs->as_expression() &&
2244              this->instructions.get_tail() &&
2245              ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
2246              type_size(ir->lhs->type) == 1 &&
2247              l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) {
2248      /* To avoid emitting an extra MOV when assigning an expression to a
2249       * variable, emit the last instruction of the expression again, but
2250       * replace the destination register with the target of the assignment.
2251       * Dead code elimination will remove the original instruction.
2252       */
2253      glsl_to_tgsi_instruction *inst, *new_inst;
2254      inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
2255      new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
2256      new_inst->saturate = inst->saturate;
2257      inst->dead_mask = inst->dst.writemask;
2258   } else {
2259      for (i = 0; i < type_size(ir->lhs->type); i++) {
2260         emit(ir, TGSI_OPCODE_MOV, l, r);
2261         l.index++;
2262         r.index++;
2263      }
2264   }
2265}
2266
2267
2268void
2269glsl_to_tgsi_visitor::visit(ir_constant *ir)
2270{
2271   st_src_reg src;
2272   GLfloat stack_vals[4] = { 0 };
2273   gl_constant_value *values = (gl_constant_value *) stack_vals;
2274   GLenum gl_type = GL_NONE;
2275   unsigned int i;
2276   static int in_array = 0;
2277   gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE;
2278
2279   /* Unfortunately, 4 floats is all we can get into
2280    * _mesa_add_typed_unnamed_constant.  So, make a temp to store an
2281    * aggregate constant and move each constant value into it.  If we
2282    * get lucky, copy propagation will eliminate the extra moves.
2283    */
2284   if (ir->type->base_type == GLSL_TYPE_STRUCT) {
2285      st_src_reg temp_base = get_temp(ir->type);
2286      st_dst_reg temp = st_dst_reg(temp_base);
2287
2288      foreach_iter(exec_list_iterator, iter, ir->components) {
2289         ir_constant *field_value = (ir_constant *)iter.get();
2290         int size = type_size(field_value->type);
2291
2292         assert(size > 0);
2293
2294         field_value->accept(this);
2295         src = this->result;
2296
2297         for (i = 0; i < (unsigned int)size; i++) {
2298            emit(ir, TGSI_OPCODE_MOV, temp, src);
2299
2300            src.index++;
2301            temp.index++;
2302         }
2303      }
2304      this->result = temp_base;
2305      return;
2306   }
2307
2308   if (ir->type->is_array()) {
2309      st_src_reg temp_base = get_temp(ir->type);
2310      st_dst_reg temp = st_dst_reg(temp_base);
2311      int size = type_size(ir->type->fields.array);
2312
2313      assert(size > 0);
2314      in_array++;
2315
2316      for (i = 0; i < ir->type->length; i++) {
2317         ir->array_elements[i]->accept(this);
2318         src = this->result;
2319         for (int j = 0; j < size; j++) {
2320            emit(ir, TGSI_OPCODE_MOV, temp, src);
2321
2322            src.index++;
2323            temp.index++;
2324         }
2325      }
2326      this->result = temp_base;
2327      in_array--;
2328      return;
2329   }
2330
2331   if (ir->type->is_matrix()) {
2332      st_src_reg mat = get_temp(ir->type);
2333      st_dst_reg mat_column = st_dst_reg(mat);
2334
2335      for (i = 0; i < ir->type->matrix_columns; i++) {
2336         assert(ir->type->base_type == GLSL_TYPE_FLOAT);
2337         values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
2338
2339         src = st_src_reg(file, -1, ir->type->base_type);
2340         src.index = add_constant(file,
2341                                  values,
2342                                  ir->type->vector_elements,
2343                                  GL_FLOAT,
2344                                  &src.swizzle);
2345         emit(ir, TGSI_OPCODE_MOV, mat_column, src);
2346
2347         mat_column.index++;
2348      }
2349
2350      this->result = mat;
2351      return;
2352   }
2353
2354   switch (ir->type->base_type) {
2355   case GLSL_TYPE_FLOAT:
2356      gl_type = GL_FLOAT;
2357      for (i = 0; i < ir->type->vector_elements; i++) {
2358         values[i].f = ir->value.f[i];
2359      }
2360      break;
2361   case GLSL_TYPE_UINT:
2362      gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT;
2363      for (i = 0; i < ir->type->vector_elements; i++) {
2364         if (native_integers)
2365            values[i].u = ir->value.u[i];
2366         else
2367            values[i].f = ir->value.u[i];
2368      }
2369      break;
2370   case GLSL_TYPE_INT:
2371      gl_type = native_integers ? GL_INT : GL_FLOAT;
2372      for (i = 0; i < ir->type->vector_elements; i++) {
2373         if (native_integers)
2374            values[i].i = ir->value.i[i];
2375         else
2376            values[i].f = ir->value.i[i];
2377      }
2378      break;
2379   case GLSL_TYPE_BOOL:
2380      gl_type = native_integers ? GL_BOOL : GL_FLOAT;
2381      for (i = 0; i < ir->type->vector_elements; i++) {
2382         if (native_integers)
2383            values[i].u = ir->value.b[i] ? ~0 : 0;
2384         else
2385            values[i].f = ir->value.b[i];
2386      }
2387      break;
2388   default:
2389      assert(!"Non-float/uint/int/bool constant");
2390   }
2391
2392   this->result = st_src_reg(file, -1, ir->type);
2393   this->result.index = add_constant(file,
2394                                     values,
2395                                     ir->type->vector_elements,
2396                                     gl_type,
2397                                     &this->result.swizzle);
2398}
2399
2400function_entry *
2401glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
2402{
2403   function_entry *entry;
2404
2405   foreach_iter(exec_list_iterator, iter, this->function_signatures) {
2406      entry = (function_entry *)iter.get();
2407
2408      if (entry->sig == sig)
2409         return entry;
2410   }
2411
2412   entry = ralloc(mem_ctx, function_entry);
2413   entry->sig = sig;
2414   entry->sig_id = this->next_signature_id++;
2415   entry->bgn_inst = NULL;
2416
2417   /* Allocate storage for all the parameters. */
2418   foreach_iter(exec_list_iterator, iter, sig->parameters) {
2419      ir_variable *param = (ir_variable *)iter.get();
2420      variable_storage *storage;
2421
2422      storage = find_variable_storage(param);
2423      assert(!storage);
2424
2425      storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
2426        				      this->next_temp);
2427      this->variables.push_tail(storage);
2428
2429      this->next_temp += type_size(param->type);
2430   }
2431
2432   if (!sig->return_type->is_void()) {
2433      entry->return_reg = get_temp(sig->return_type);
2434   } else {
2435      entry->return_reg = undef_src;
2436   }
2437
2438   this->function_signatures.push_tail(entry);
2439   return entry;
2440}
2441
2442void
2443glsl_to_tgsi_visitor::visit(ir_call *ir)
2444{
2445   glsl_to_tgsi_instruction *call_inst;
2446   ir_function_signature *sig = ir->callee;
2447   function_entry *entry = get_function_signature(sig);
2448   int i;
2449
2450   /* Process in parameters. */
2451   exec_list_iterator sig_iter = sig->parameters.iterator();
2452   foreach_iter(exec_list_iterator, iter, *ir) {
2453      ir_rvalue *param_rval = (ir_rvalue *)iter.get();
2454      ir_variable *param = (ir_variable *)sig_iter.get();
2455
2456      if (param->mode == ir_var_in ||
2457          param->mode == ir_var_inout) {
2458         variable_storage *storage = find_variable_storage(param);
2459         assert(storage);
2460
2461         param_rval->accept(this);
2462         st_src_reg r = this->result;
2463
2464         st_dst_reg l;
2465         l.file = storage->file;
2466         l.index = storage->index;
2467         l.reladdr = NULL;
2468         l.writemask = WRITEMASK_XYZW;
2469         l.cond_mask = COND_TR;
2470
2471         for (i = 0; i < type_size(param->type); i++) {
2472            emit(ir, TGSI_OPCODE_MOV, l, r);
2473            l.index++;
2474            r.index++;
2475         }
2476      }
2477
2478      sig_iter.next();
2479   }
2480   assert(!sig_iter.has_next());
2481
2482   /* Emit call instruction */
2483   call_inst = emit(ir, TGSI_OPCODE_CAL);
2484   call_inst->function = entry;
2485
2486   /* Process out parameters. */
2487   sig_iter = sig->parameters.iterator();
2488   foreach_iter(exec_list_iterator, iter, *ir) {
2489      ir_rvalue *param_rval = (ir_rvalue *)iter.get();
2490      ir_variable *param = (ir_variable *)sig_iter.get();
2491
2492      if (param->mode == ir_var_out ||
2493          param->mode == ir_var_inout) {
2494         variable_storage *storage = find_variable_storage(param);
2495         assert(storage);
2496
2497         st_src_reg r;
2498         r.file = storage->file;
2499         r.index = storage->index;
2500         r.reladdr = NULL;
2501         r.swizzle = SWIZZLE_NOOP;
2502         r.negate = 0;
2503
2504         param_rval->accept(this);
2505         st_dst_reg l = st_dst_reg(this->result);
2506
2507         for (i = 0; i < type_size(param->type); i++) {
2508            emit(ir, TGSI_OPCODE_MOV, l, r);
2509            l.index++;
2510            r.index++;
2511         }
2512      }
2513
2514      sig_iter.next();
2515   }
2516   assert(!sig_iter.has_next());
2517
2518   /* Process return value. */
2519   this->result = entry->return_reg;
2520}
2521
2522void
2523glsl_to_tgsi_visitor::visit(ir_texture *ir)
2524{
2525   st_src_reg result_src, coord, lod_info, projector, dx, dy, offset;
2526   st_dst_reg result_dst, coord_dst;
2527   glsl_to_tgsi_instruction *inst = NULL;
2528   unsigned opcode = TGSI_OPCODE_NOP;
2529
2530   if (ir->coordinate) {
2531      ir->coordinate->accept(this);
2532
2533      /* Put our coords in a temp.  We'll need to modify them for shadow,
2534       * projection, or LOD, so the only case we'd use it as is is if
2535       * we're doing plain old texturing.  The optimization passes on
2536       * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
2537       */
2538      coord = get_temp(glsl_type::vec4_type);
2539      coord_dst = st_dst_reg(coord);
2540      emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
2541   }
2542
2543   if (ir->projector) {
2544      ir->projector->accept(this);
2545      projector = this->result;
2546   }
2547
2548   /* Storage for our result.  Ideally for an assignment we'd be using
2549    * the actual storage for the result here, instead.
2550    */
2551   result_src = get_temp(glsl_type::vec4_type);
2552   result_dst = st_dst_reg(result_src);
2553
2554   switch (ir->op) {
2555   case ir_tex:
2556      opcode = TGSI_OPCODE_TEX;
2557      break;
2558   case ir_txb:
2559      opcode = TGSI_OPCODE_TXB;
2560      ir->lod_info.bias->accept(this);
2561      lod_info = this->result;
2562      break;
2563   case ir_txl:
2564      opcode = TGSI_OPCODE_TXL;
2565      ir->lod_info.lod->accept(this);
2566      lod_info = this->result;
2567      break;
2568   case ir_txd:
2569      opcode = TGSI_OPCODE_TXD;
2570      ir->lod_info.grad.dPdx->accept(this);
2571      dx = this->result;
2572      ir->lod_info.grad.dPdy->accept(this);
2573      dy = this->result;
2574      break;
2575   case ir_txs:
2576      opcode = TGSI_OPCODE_TXQ;
2577      ir->lod_info.lod->accept(this);
2578      lod_info = this->result;
2579      break;
2580   case ir_txf:
2581      opcode = TGSI_OPCODE_TXF;
2582      ir->lod_info.lod->accept(this);
2583      lod_info = this->result;
2584      if (ir->offset) {
2585	 ir->offset->accept(this);
2586	 offset = this->result;
2587      }
2588      break;
2589   }
2590
2591   const glsl_type *sampler_type = ir->sampler->type;
2592
2593   if (ir->projector) {
2594      if (opcode == TGSI_OPCODE_TEX) {
2595         /* Slot the projector in as the last component of the coord. */
2596         coord_dst.writemask = WRITEMASK_W;
2597         emit(ir, TGSI_OPCODE_MOV, coord_dst, projector);
2598         coord_dst.writemask = WRITEMASK_XYZW;
2599         opcode = TGSI_OPCODE_TXP;
2600      } else {
2601         st_src_reg coord_w = coord;
2602         coord_w.swizzle = SWIZZLE_WWWW;
2603
2604         /* For the other TEX opcodes there's no projective version
2605          * since the last slot is taken up by LOD info.  Do the
2606          * projective divide now.
2607          */
2608         coord_dst.writemask = WRITEMASK_W;
2609         emit(ir, TGSI_OPCODE_RCP, coord_dst, projector);
2610
2611         /* In the case where we have to project the coordinates "by hand,"
2612          * the shadow comparator value must also be projected.
2613          */
2614         st_src_reg tmp_src = coord;
2615         if (ir->shadow_comparitor) {
2616            /* Slot the shadow value in as the second to last component of the
2617             * coord.
2618             */
2619            ir->shadow_comparitor->accept(this);
2620
2621            tmp_src = get_temp(glsl_type::vec4_type);
2622            st_dst_reg tmp_dst = st_dst_reg(tmp_src);
2623
2624	    /* Projective division not allowed for array samplers. */
2625	    assert(!sampler_type->sampler_array);
2626
2627            tmp_dst.writemask = WRITEMASK_Z;
2628            emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result);
2629
2630            tmp_dst.writemask = WRITEMASK_XY;
2631            emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord);
2632         }
2633
2634         coord_dst.writemask = WRITEMASK_XYZ;
2635         emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w);
2636
2637         coord_dst.writemask = WRITEMASK_XYZW;
2638         coord.swizzle = SWIZZLE_XYZW;
2639      }
2640   }
2641
2642   /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow
2643    * comparator was put in the correct place (and projected) by the code,
2644    * above, that handles by-hand projection.
2645    */
2646   if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) {
2647      /* Slot the shadow value in as the second to last component of the
2648       * coord.
2649       */
2650      ir->shadow_comparitor->accept(this);
2651
2652      /* XXX This will need to be updated for cubemap array samplers. */
2653      if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D &&
2654	   sampler_type->sampler_array) ||
2655	  sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) {
2656         coord_dst.writemask = WRITEMASK_W;
2657      } else {
2658         coord_dst.writemask = WRITEMASK_Z;
2659      }
2660
2661      emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
2662      coord_dst.writemask = WRITEMASK_XYZW;
2663   }
2664
2665   if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB ||
2666       opcode == TGSI_OPCODE_TXF) {
2667      /* TGSI stores LOD or LOD bias in the last channel of the coords. */
2668      coord_dst.writemask = WRITEMASK_W;
2669      emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info);
2670      coord_dst.writemask = WRITEMASK_XYZW;
2671   }
2672
2673   if (opcode == TGSI_OPCODE_TXD)
2674      inst = emit(ir, opcode, result_dst, coord, dx, dy);
2675   else if (opcode == TGSI_OPCODE_TXQ)
2676      inst = emit(ir, opcode, result_dst, lod_info);
2677   else if (opcode == TGSI_OPCODE_TXF) {
2678      inst = emit(ir, opcode, result_dst, coord);
2679   } else
2680      inst = emit(ir, opcode, result_dst, coord);
2681
2682   if (ir->shadow_comparitor)
2683      inst->tex_shadow = GL_TRUE;
2684
2685   inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler,
2686        					   this->shader_program,
2687        					   this->prog);
2688
2689   if (ir->offset) {
2690       inst->tex_offset_num_offset = 1;
2691       inst->tex_offsets[0].Index = offset.index;
2692       inst->tex_offsets[0].File = offset.file;
2693       inst->tex_offsets[0].SwizzleX = GET_SWZ(offset.swizzle, 0);
2694       inst->tex_offsets[0].SwizzleY = GET_SWZ(offset.swizzle, 1);
2695       inst->tex_offsets[0].SwizzleZ = GET_SWZ(offset.swizzle, 2);
2696   }
2697
2698   switch (sampler_type->sampler_dimensionality) {
2699   case GLSL_SAMPLER_DIM_1D:
2700      inst->tex_target = (sampler_type->sampler_array)
2701         ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
2702      break;
2703   case GLSL_SAMPLER_DIM_2D:
2704      inst->tex_target = (sampler_type->sampler_array)
2705         ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
2706      break;
2707   case GLSL_SAMPLER_DIM_3D:
2708      inst->tex_target = TEXTURE_3D_INDEX;
2709      break;
2710   case GLSL_SAMPLER_DIM_CUBE:
2711      inst->tex_target = TEXTURE_CUBE_INDEX;
2712      break;
2713   case GLSL_SAMPLER_DIM_RECT:
2714      inst->tex_target = TEXTURE_RECT_INDEX;
2715      break;
2716   case GLSL_SAMPLER_DIM_BUF:
2717      assert(!"FINISHME: Implement ARB_texture_buffer_object");
2718      break;
2719   case GLSL_SAMPLER_DIM_EXTERNAL:
2720      inst->tex_target = TEXTURE_EXTERNAL_INDEX;
2721      break;
2722   default:
2723      assert(!"Should not get here.");
2724   }
2725
2726   this->result = result_src;
2727}
2728
2729void
2730glsl_to_tgsi_visitor::visit(ir_return *ir)
2731{
2732   if (ir->get_value()) {
2733      st_dst_reg l;
2734      int i;
2735
2736      assert(current_function);
2737
2738      ir->get_value()->accept(this);
2739      st_src_reg r = this->result;
2740
2741      l = st_dst_reg(current_function->return_reg);
2742
2743      for (i = 0; i < type_size(current_function->sig->return_type); i++) {
2744         emit(ir, TGSI_OPCODE_MOV, l, r);
2745         l.index++;
2746         r.index++;
2747      }
2748   }
2749
2750   emit(ir, TGSI_OPCODE_RET);
2751}
2752
2753void
2754glsl_to_tgsi_visitor::visit(ir_discard *ir)
2755{
2756   struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
2757
2758   if (ir->condition) {
2759      ir->condition->accept(this);
2760      this->result.negate = ~this->result.negate;
2761      emit(ir, TGSI_OPCODE_KIL, undef_dst, this->result);
2762   } else {
2763      emit(ir, TGSI_OPCODE_KILP);
2764   }
2765
2766   fp->UsesKill = GL_TRUE;
2767}
2768
2769void
2770glsl_to_tgsi_visitor::visit(ir_if *ir)
2771{
2772   glsl_to_tgsi_instruction *cond_inst, *if_inst;
2773   glsl_to_tgsi_instruction *prev_inst;
2774
2775   prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
2776
2777   ir->condition->accept(this);
2778   assert(this->result.file != PROGRAM_UNDEFINED);
2779
2780   if (this->options->EmitCondCodes) {
2781      cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
2782
2783      /* See if we actually generated any instruction for generating
2784       * the condition.  If not, then cook up a move to a temp so we
2785       * have something to set cond_update on.
2786       */
2787      if (cond_inst == prev_inst) {
2788         st_src_reg temp = get_temp(glsl_type::bool_type);
2789         cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result);
2790      }
2791      cond_inst->cond_update = GL_TRUE;
2792
2793      if_inst = emit(ir->condition, TGSI_OPCODE_IF);
2794      if_inst->dst.cond_mask = COND_NE;
2795   } else {
2796      if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result);
2797   }
2798
2799   this->instructions.push_tail(if_inst);
2800
2801   visit_exec_list(&ir->then_instructions, this);
2802
2803   if (!ir->else_instructions.is_empty()) {
2804      emit(ir->condition, TGSI_OPCODE_ELSE);
2805      visit_exec_list(&ir->else_instructions, this);
2806   }
2807
2808   if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF);
2809}
2810
2811glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
2812{
2813   result.file = PROGRAM_UNDEFINED;
2814   next_temp = 1;
2815   next_signature_id = 1;
2816   num_immediates = 0;
2817   current_function = NULL;
2818   num_address_regs = 0;
2819   samplers_used = 0;
2820   indirect_addr_temps = false;
2821   indirect_addr_consts = false;
2822   num_clip_distances = 0;
2823   glsl_version = 0;
2824   native_integers = false;
2825   mem_ctx = ralloc_context(NULL);
2826   ctx = NULL;
2827   prog = NULL;
2828   shader_program = NULL;
2829   options = NULL;
2830}
2831
2832glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
2833{
2834   ralloc_free(mem_ctx);
2835}
2836
2837extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v)
2838{
2839   delete v;
2840}
2841
2842
2843/**
2844 * Count resources used by the given gpu program (number of texture
2845 * samplers, etc).
2846 */
2847static void
2848count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
2849{
2850   v->samplers_used = 0;
2851
2852   foreach_iter(exec_list_iterator, iter, v->instructions) {
2853      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2854
2855      if (is_tex_instruction(inst->op)) {
2856         v->samplers_used |= 1 << inst->sampler;
2857
2858         if (inst->tex_shadow) {
2859            prog->ShadowSamplers |= 1 << inst->sampler;
2860         }
2861      }
2862   }
2863
2864   prog->SamplersUsed = v->samplers_used;
2865
2866   if (v->shader_program != NULL)
2867      _mesa_update_shader_textures_used(v->shader_program, prog);
2868}
2869
2870static void
2871set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
2872        		struct gl_shader_program *shader_program,
2873        		const char *name, const glsl_type *type,
2874        		ir_constant *val)
2875{
2876   if (type->is_record()) {
2877      ir_constant *field_constant;
2878
2879      field_constant = (ir_constant *)val->components.get_head();
2880
2881      for (unsigned int i = 0; i < type->length; i++) {
2882         const glsl_type *field_type = type->fields.structure[i].type;
2883         const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name,
2884        				    type->fields.structure[i].name);
2885         set_uniform_initializer(ctx, mem_ctx, shader_program, field_name,
2886        			 field_type, field_constant);
2887         field_constant = (ir_constant *)field_constant->next;
2888      }
2889      return;
2890   }
2891
2892   int loc = _mesa_get_uniform_location(ctx, shader_program, name);
2893
2894   if (loc == -1) {
2895      fail_link(shader_program,
2896        	"Couldn't find uniform for initializer %s\n", name);
2897      return;
2898   }
2899
2900   for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) {
2901      ir_constant *element;
2902      const glsl_type *element_type;
2903      if (type->is_array()) {
2904         element = val->array_elements[i];
2905         element_type = type->fields.array;
2906      } else {
2907         element = val;
2908         element_type = type;
2909      }
2910
2911      void *values;
2912
2913      if (element_type->base_type == GLSL_TYPE_BOOL) {
2914         int *conv = ralloc_array(mem_ctx, int, element_type->components());
2915         for (unsigned int j = 0; j < element_type->components(); j++) {
2916            conv[j] = element->value.b[j];
2917         }
2918         values = (void *)conv;
2919         element_type = glsl_type::get_instance(GLSL_TYPE_INT,
2920        					element_type->vector_elements,
2921        					1);
2922      } else {
2923         values = &element->value;
2924      }
2925
2926      if (element_type->is_matrix()) {
2927         _mesa_uniform_matrix(ctx, shader_program,
2928        		      element_type->matrix_columns,
2929        		      element_type->vector_elements,
2930        		      loc, 1, GL_FALSE, (GLfloat *)values);
2931      } else {
2932         _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns,
2933        	       values, element_type->gl_type);
2934      }
2935
2936      loc++;
2937   }
2938}
2939
2940/**
2941 * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
2942 * are read from the given src in this instruction
2943 */
2944static int
2945get_src_arg_mask(st_dst_reg dst, st_src_reg src)
2946{
2947   int read_mask = 0, comp;
2948
2949   /* Now, given the src swizzle and the written channels, find which
2950    * components are actually read
2951    */
2952   for (comp = 0; comp < 4; ++comp) {
2953      const unsigned coord = GET_SWZ(src.swizzle, comp);
2954      ASSERT(coord < 4);
2955      if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W)
2956         read_mask |= 1 << coord;
2957   }
2958
2959   return read_mask;
2960}
2961
2962/**
2963 * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
2964 * instruction is the first instruction to write to register T0.  There are
2965 * several lowering passes done in GLSL IR (e.g. branches and
2966 * relative addressing) that create a large number of conditional assignments
2967 * that ir_to_mesa converts to CMP instructions like the one mentioned above.
2968 *
2969 * Here is why this conversion is safe:
2970 * CMP T0, T1 T2 T0 can be expanded to:
2971 * if (T1 < 0.0)
2972 * 	MOV T0, T2;
2973 * else
2974 * 	MOV T0, T0;
2975 *
2976 * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
2977 * as the original program.  If (T1 < 0.0) evaluates to false, executing
2978 * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
2979 * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
2980 * because any instruction that was going to read from T0 after this was going
2981 * to read a garbage value anyway.
2982 */
2983void
2984glsl_to_tgsi_visitor::simplify_cmp(void)
2985{
2986   unsigned *tempWrites;
2987   unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
2988
2989   tempWrites = new unsigned[MAX_TEMPS];
2990   if (!tempWrites) {
2991      return;
2992   }
2993   memset(tempWrites, 0, sizeof(unsigned) * MAX_TEMPS);
2994   memset(outputWrites, 0, sizeof(outputWrites));
2995
2996   foreach_iter(exec_list_iterator, iter, this->instructions) {
2997      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2998      unsigned prevWriteMask = 0;
2999
3000      /* Give up if we encounter relative addressing or flow control. */
3001      if (inst->dst.reladdr ||
3002          tgsi_get_opcode_info(inst->op)->is_branch ||
3003          inst->op == TGSI_OPCODE_BGNSUB ||
3004          inst->op == TGSI_OPCODE_CONT ||
3005          inst->op == TGSI_OPCODE_END ||
3006          inst->op == TGSI_OPCODE_ENDSUB ||
3007          inst->op == TGSI_OPCODE_RET) {
3008         break;
3009      }
3010
3011      if (inst->dst.file == PROGRAM_OUTPUT) {
3012         assert(inst->dst.index < MAX_PROGRAM_OUTPUTS);
3013         prevWriteMask = outputWrites[inst->dst.index];
3014         outputWrites[inst->dst.index] |= inst->dst.writemask;
3015      } else if (inst->dst.file == PROGRAM_TEMPORARY) {
3016         assert(inst->dst.index < MAX_TEMPS);
3017         prevWriteMask = tempWrites[inst->dst.index];
3018         tempWrites[inst->dst.index] |= inst->dst.writemask;
3019      }
3020
3021      /* For a CMP to be considered a conditional write, the destination
3022       * register and source register two must be the same. */
3023      if (inst->op == TGSI_OPCODE_CMP
3024          && !(inst->dst.writemask & prevWriteMask)
3025          && inst->src[2].file == inst->dst.file
3026          && inst->src[2].index == inst->dst.index
3027          && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) {
3028
3029         inst->op = TGSI_OPCODE_MOV;
3030         inst->src[0] = inst->src[1];
3031      }
3032   }
3033
3034   delete [] tempWrites;
3035}
3036
3037/* Replaces all references to a temporary register index with another index. */
3038void
3039glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
3040{
3041   foreach_iter(exec_list_iterator, iter, this->instructions) {
3042      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3043      unsigned j;
3044
3045      for (j=0; j < num_inst_src_regs(inst->op); j++) {
3046         if (inst->src[j].file == PROGRAM_TEMPORARY &&
3047             inst->src[j].index == index) {
3048            inst->src[j].index = new_index;
3049         }
3050      }
3051
3052      if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
3053         inst->dst.index = new_index;
3054      }
3055   }
3056}
3057
3058int
3059glsl_to_tgsi_visitor::get_first_temp_read(int index)
3060{
3061   int depth = 0; /* loop depth */
3062   int loop_start = -1; /* index of the first active BGNLOOP (if any) */
3063   unsigned i = 0, j;
3064
3065   foreach_iter(exec_list_iterator, iter, this->instructions) {
3066      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3067
3068      for (j=0; j < num_inst_src_regs(inst->op); j++) {
3069         if (inst->src[j].file == PROGRAM_TEMPORARY &&
3070             inst->src[j].index == index) {
3071            return (depth == 0) ? i : loop_start;
3072         }
3073      }
3074
3075      if (inst->op == TGSI_OPCODE_BGNLOOP) {
3076         if(depth++ == 0)
3077            loop_start = i;
3078      } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
3079         if (--depth == 0)
3080            loop_start = -1;
3081      }
3082      assert(depth >= 0);
3083
3084      i++;
3085   }
3086
3087   return -1;
3088}
3089
3090int
3091glsl_to_tgsi_visitor::get_first_temp_write(int index)
3092{
3093   int depth = 0; /* loop depth */
3094   int loop_start = -1; /* index of the first active BGNLOOP (if any) */
3095   int i = 0;
3096
3097   foreach_iter(exec_list_iterator, iter, this->instructions) {
3098      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3099
3100      if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
3101         return (depth == 0) ? i : loop_start;
3102      }
3103
3104      if (inst->op == TGSI_OPCODE_BGNLOOP) {
3105         if(depth++ == 0)
3106            loop_start = i;
3107      } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
3108         if (--depth == 0)
3109            loop_start = -1;
3110      }
3111      assert(depth >= 0);
3112
3113      i++;
3114   }
3115
3116   return -1;
3117}
3118
3119int
3120glsl_to_tgsi_visitor::get_last_temp_read(int index)
3121{
3122   int depth = 0; /* loop depth */
3123   int last = -1; /* index of last instruction that reads the temporary */
3124   unsigned i = 0, j;
3125
3126   foreach_iter(exec_list_iterator, iter, this->instructions) {
3127      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3128
3129      for (j=0; j < num_inst_src_regs(inst->op); j++) {
3130         if (inst->src[j].file == PROGRAM_TEMPORARY &&
3131             inst->src[j].index == index) {
3132            last = (depth == 0) ? i : -2;
3133         }
3134      }
3135
3136      if (inst->op == TGSI_OPCODE_BGNLOOP)
3137         depth++;
3138      else if (inst->op == TGSI_OPCODE_ENDLOOP)
3139         if (--depth == 0 && last == -2)
3140            last = i;
3141      assert(depth >= 0);
3142
3143      i++;
3144   }
3145
3146   assert(last >= -1);
3147   return last;
3148}
3149
3150int
3151glsl_to_tgsi_visitor::get_last_temp_write(int index)
3152{
3153   int depth = 0; /* loop depth */
3154   int last = -1; /* index of last instruction that writes to the temporary */
3155   int i = 0;
3156
3157   foreach_iter(exec_list_iterator, iter, this->instructions) {
3158      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3159
3160      if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index)
3161         last = (depth == 0) ? i : -2;
3162
3163      if (inst->op == TGSI_OPCODE_BGNLOOP)
3164         depth++;
3165      else if (inst->op == TGSI_OPCODE_ENDLOOP)
3166         if (--depth == 0 && last == -2)
3167            last = i;
3168      assert(depth >= 0);
3169
3170      i++;
3171   }
3172
3173   assert(last >= -1);
3174   return last;
3175}
3176
3177/*
3178 * On a basic block basis, tracks available PROGRAM_TEMPORARY register
3179 * channels for copy propagation and updates following instructions to
3180 * use the original versions.
3181 *
3182 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
3183 * will occur.  As an example, a TXP production before this pass:
3184 *
3185 * 0: MOV TEMP[1], INPUT[4].xyyy;
3186 * 1: MOV TEMP[1].w, INPUT[4].wwww;
3187 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
3188 *
3189 * and after:
3190 *
3191 * 0: MOV TEMP[1], INPUT[4].xyyy;
3192 * 1: MOV TEMP[1].w, INPUT[4].wwww;
3193 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
3194 *
3195 * which allows for dead code elimination on TEMP[1]'s writes.
3196 */
3197void
3198glsl_to_tgsi_visitor::copy_propagate(void)
3199{
3200   glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
3201        					    glsl_to_tgsi_instruction *,
3202        					    this->next_temp * 4);
3203   int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
3204   int level = 0;
3205
3206   foreach_iter(exec_list_iterator, iter, this->instructions) {
3207      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3208
3209      assert(inst->dst.file != PROGRAM_TEMPORARY
3210             || inst->dst.index < this->next_temp);
3211
3212      /* First, do any copy propagation possible into the src regs. */
3213      for (int r = 0; r < 3; r++) {
3214         glsl_to_tgsi_instruction *first = NULL;
3215         bool good = true;
3216         int acp_base = inst->src[r].index * 4;
3217
3218         if (inst->src[r].file != PROGRAM_TEMPORARY ||
3219             inst->src[r].reladdr)
3220            continue;
3221
3222         /* See if we can find entries in the ACP consisting of MOVs
3223          * from the same src register for all the swizzled channels
3224          * of this src register reference.
3225          */
3226         for (int i = 0; i < 4; i++) {
3227            int src_chan = GET_SWZ(inst->src[r].swizzle, i);
3228            glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
3229
3230            if (!copy_chan) {
3231               good = false;
3232               break;
3233            }
3234
3235            assert(acp_level[acp_base + src_chan] <= level);
3236
3237            if (!first) {
3238               first = copy_chan;
3239            } else {
3240               if (first->src[0].file != copy_chan->src[0].file ||
3241        	   first->src[0].index != copy_chan->src[0].index) {
3242        	  good = false;
3243        	  break;
3244               }
3245            }
3246         }
3247
3248         if (good) {
3249            /* We've now validated that we can copy-propagate to
3250             * replace this src register reference.  Do it.
3251             */
3252            inst->src[r].file = first->src[0].file;
3253            inst->src[r].index = first->src[0].index;
3254
3255            int swizzle = 0;
3256            for (int i = 0; i < 4; i++) {
3257               int src_chan = GET_SWZ(inst->src[r].swizzle, i);
3258               glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan];
3259               swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) <<
3260        		   (3 * i));
3261            }
3262            inst->src[r].swizzle = swizzle;
3263         }
3264      }
3265
3266      switch (inst->op) {
3267      case TGSI_OPCODE_BGNLOOP:
3268      case TGSI_OPCODE_ENDLOOP:
3269         /* End of a basic block, clear the ACP entirely. */
3270         memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
3271         break;
3272
3273      case TGSI_OPCODE_IF:
3274         ++level;
3275         break;
3276
3277      case TGSI_OPCODE_ENDIF:
3278      case TGSI_OPCODE_ELSE:
3279         /* Clear all channels written inside the block from the ACP, but
3280          * leaving those that were not touched.
3281          */
3282         for (int r = 0; r < this->next_temp; r++) {
3283            for (int c = 0; c < 4; c++) {
3284               if (!acp[4 * r + c])
3285        	  continue;
3286
3287               if (acp_level[4 * r + c] >= level)
3288        	  acp[4 * r + c] = NULL;
3289            }
3290         }
3291         if (inst->op == TGSI_OPCODE_ENDIF)
3292            --level;
3293         break;
3294
3295      default:
3296         /* Continuing the block, clear any written channels from
3297          * the ACP.
3298          */
3299         if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) {
3300            /* Any temporary might be written, so no copy propagation
3301             * across this instruction.
3302             */
3303            memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
3304         } else if (inst->dst.file == PROGRAM_OUTPUT &&
3305        	    inst->dst.reladdr) {
3306            /* Any output might be written, so no copy propagation
3307             * from outputs across this instruction.
3308             */
3309            for (int r = 0; r < this->next_temp; r++) {
3310               for (int c = 0; c < 4; c++) {
3311        	  if (!acp[4 * r + c])
3312        	     continue;
3313
3314        	  if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
3315        	     acp[4 * r + c] = NULL;
3316               }
3317            }
3318         } else if (inst->dst.file == PROGRAM_TEMPORARY ||
3319        	    inst->dst.file == PROGRAM_OUTPUT) {
3320            /* Clear where it's used as dst. */
3321            if (inst->dst.file == PROGRAM_TEMPORARY) {
3322               for (int c = 0; c < 4; c++) {
3323        	  if (inst->dst.writemask & (1 << c)) {
3324        	     acp[4 * inst->dst.index + c] = NULL;
3325        	  }
3326               }
3327            }
3328
3329            /* Clear where it's used as src. */
3330            for (int r = 0; r < this->next_temp; r++) {
3331               for (int c = 0; c < 4; c++) {
3332        	  if (!acp[4 * r + c])
3333        	     continue;
3334
3335        	  int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
3336
3337        	  if (acp[4 * r + c]->src[0].file == inst->dst.file &&
3338        	      acp[4 * r + c]->src[0].index == inst->dst.index &&
3339        	      inst->dst.writemask & (1 << src_chan))
3340        	  {
3341        	     acp[4 * r + c] = NULL;
3342        	  }
3343               }
3344            }
3345         }
3346         break;
3347      }
3348
3349      /* If this is a copy, add it to the ACP. */
3350      if (inst->op == TGSI_OPCODE_MOV &&
3351          inst->dst.file == PROGRAM_TEMPORARY &&
3352          !inst->dst.reladdr &&
3353          !inst->saturate &&
3354          !inst->src[0].reladdr &&
3355          !inst->src[0].negate) {
3356         for (int i = 0; i < 4; i++) {
3357            if (inst->dst.writemask & (1 << i)) {
3358               acp[4 * inst->dst.index + i] = inst;
3359               acp_level[4 * inst->dst.index + i] = level;
3360            }
3361         }
3362      }
3363   }
3364
3365   ralloc_free(acp_level);
3366   ralloc_free(acp);
3367}
3368
3369/*
3370 * Tracks available PROGRAM_TEMPORARY registers for dead code elimination.
3371 *
3372 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
3373 * will occur.  As an example, a TXP production after copy propagation but
3374 * before this pass:
3375 *
3376 * 0: MOV TEMP[1], INPUT[4].xyyy;
3377 * 1: MOV TEMP[1].w, INPUT[4].wwww;
3378 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
3379 *
3380 * and after this pass:
3381 *
3382 * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
3383 *
3384 * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB)
3385 * FIXME: doesn't eliminate all dead code inside of loops; it steps around them
3386 */
3387void
3388glsl_to_tgsi_visitor::eliminate_dead_code(void)
3389{
3390   int i;
3391
3392   for (i=0; i < this->next_temp; i++) {
3393      int last_read = get_last_temp_read(i);
3394      int j = 0;
3395
3396      foreach_iter(exec_list_iterator, iter, this->instructions) {
3397         glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3398
3399         if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i &&
3400             j > last_read)
3401         {
3402            iter.remove();
3403            delete inst;
3404         }
3405
3406         j++;
3407      }
3408   }
3409}
3410
3411/*
3412 * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead
3413 * code elimination.  This is less primitive than eliminate_dead_code(), as it
3414 * is per-channel and can detect consecutive writes without a read between them
3415 * as dead code.  However, there is some dead code that can be eliminated by
3416 * eliminate_dead_code() but not this function - for example, this function
3417 * cannot eliminate an instruction writing to a register that is never read and
3418 * is the only instruction writing to that register.
3419 *
3420 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
3421 * will occur.
3422 */
3423int
3424glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
3425{
3426   glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
3427                                                     glsl_to_tgsi_instruction *,
3428                                                     this->next_temp * 4);
3429   int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
3430   int level = 0;
3431   int removed = 0;
3432
3433   foreach_iter(exec_list_iterator, iter, this->instructions) {
3434      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3435
3436      assert(inst->dst.file != PROGRAM_TEMPORARY
3437             || inst->dst.index < this->next_temp);
3438
3439      switch (inst->op) {
3440      case TGSI_OPCODE_BGNLOOP:
3441      case TGSI_OPCODE_ENDLOOP:
3442      case TGSI_OPCODE_CONT:
3443      case TGSI_OPCODE_BRK:
3444         /* End of a basic block, clear the write array entirely.
3445          *
3446          * This keeps us from killing dead code when the writes are
3447          * on either side of a loop, even when the register isn't touched
3448          * inside the loop.  However, glsl_to_tgsi_visitor doesn't seem to emit
3449          * dead code of this type, so it shouldn't make a difference as long as
3450          * the dead code elimination pass in the GLSL compiler does its job.
3451          */
3452         memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
3453         break;
3454
3455      case TGSI_OPCODE_ENDIF:
3456      case TGSI_OPCODE_ELSE:
3457         /* Promote the recorded level of all channels written inside the
3458          * preceding if or else block to the level above the if/else block.
3459          */
3460         for (int r = 0; r < this->next_temp; r++) {
3461            for (int c = 0; c < 4; c++) {
3462               if (!writes[4 * r + c])
3463        	         continue;
3464
3465               if (write_level[4 * r + c] == level)
3466        	         write_level[4 * r + c] = level-1;
3467            }
3468         }
3469
3470         if(inst->op == TGSI_OPCODE_ENDIF)
3471            --level;
3472
3473         break;
3474
3475      case TGSI_OPCODE_IF:
3476         ++level;
3477         /* fallthrough to default case to mark the condition as read */
3478
3479      default:
3480         /* Continuing the block, clear any channels from the write array that
3481          * are read by this instruction.
3482          */
3483         for (unsigned i = 0; i < Elements(inst->src); i++) {
3484            if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){
3485               /* Any temporary might be read, so no dead code elimination
3486                * across this instruction.
3487                */
3488               memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
3489            } else if (inst->src[i].file == PROGRAM_TEMPORARY) {
3490               /* Clear where it's used as src. */
3491               int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0);
3492               src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1);
3493               src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2);
3494               src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3);
3495
3496               for (int c = 0; c < 4; c++) {
3497              	   if (src_chans & (1 << c)) {
3498              	      writes[4 * inst->src[i].index + c] = NULL;
3499              	   }
3500               }
3501            }
3502         }
3503         break;
3504      }
3505
3506      /* If this instruction writes to a temporary, add it to the write array.
3507       * If there is already an instruction in the write array for one or more
3508       * of the channels, flag that channel write as dead.
3509       */
3510      if (inst->dst.file == PROGRAM_TEMPORARY &&
3511          !inst->dst.reladdr &&
3512          !inst->saturate) {
3513         for (int c = 0; c < 4; c++) {
3514            if (inst->dst.writemask & (1 << c)) {
3515               if (writes[4 * inst->dst.index + c]) {
3516                  if (write_level[4 * inst->dst.index + c] < level)
3517                     continue;
3518                  else
3519                     writes[4 * inst->dst.index + c]->dead_mask |= (1 << c);
3520               }
3521               writes[4 * inst->dst.index + c] = inst;
3522               write_level[4 * inst->dst.index + c] = level;
3523            }
3524         }
3525      }
3526   }
3527
3528   /* Anything still in the write array at this point is dead code. */
3529   for (int r = 0; r < this->next_temp; r++) {
3530      for (int c = 0; c < 4; c++) {
3531         glsl_to_tgsi_instruction *inst = writes[4 * r + c];
3532         if (inst)
3533            inst->dead_mask |= (1 << c);
3534      }
3535   }
3536
3537   /* Now actually remove the instructions that are completely dead and update
3538    * the writemask of other instructions with dead channels.
3539    */
3540   foreach_iter(exec_list_iterator, iter, this->instructions) {
3541      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3542
3543      if (!inst->dead_mask || !inst->dst.writemask)
3544         continue;
3545      else if ((inst->dst.writemask & ~inst->dead_mask) == 0) {
3546         iter.remove();
3547         delete inst;
3548         removed++;
3549      } else
3550         inst->dst.writemask &= ~(inst->dead_mask);
3551   }
3552
3553   ralloc_free(write_level);
3554   ralloc_free(writes);
3555
3556   return removed;
3557}
3558
3559/* Merges temporary registers together where possible to reduce the number of
3560 * registers needed to run a program.
3561 *
3562 * Produces optimal code only after copy propagation and dead code elimination
3563 * have been run. */
3564void
3565glsl_to_tgsi_visitor::merge_registers(void)
3566{
3567   int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp);
3568   int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp);
3569   int i, j;
3570
3571   /* Read the indices of the last read and first write to each temp register
3572    * into an array so that we don't have to traverse the instruction list as
3573    * much. */
3574   for (i=0; i < this->next_temp; i++) {
3575      last_reads[i] = get_last_temp_read(i);
3576      first_writes[i] = get_first_temp_write(i);
3577   }
3578
3579   /* Start looking for registers with non-overlapping usages that can be
3580    * merged together. */
3581   for (i=0; i < this->next_temp; i++) {
3582      /* Don't touch unused registers. */
3583      if (last_reads[i] < 0 || first_writes[i] < 0) continue;
3584
3585      for (j=0; j < this->next_temp; j++) {
3586         /* Don't touch unused registers. */
3587         if (last_reads[j] < 0 || first_writes[j] < 0) continue;
3588
3589         /* We can merge the two registers if the first write to j is after or
3590          * in the same instruction as the last read from i.  Note that the
3591          * register at index i will always be used earlier or at the same time
3592          * as the register at index j. */
3593         if (first_writes[i] <= first_writes[j] &&
3594             last_reads[i] <= first_writes[j])
3595         {
3596            rename_temp_register(j, i); /* Replace all references to j with i.*/
3597
3598            /* Update the first_writes and last_reads arrays with the new
3599             * values for the merged register index, and mark the newly unused
3600             * register index as such. */
3601            last_reads[i] = last_reads[j];
3602            first_writes[j] = -1;
3603            last_reads[j] = -1;
3604         }
3605      }
3606   }
3607
3608   ralloc_free(last_reads);
3609   ralloc_free(first_writes);
3610}
3611
3612/* Reassign indices to temporary registers by reusing unused indices created
3613 * by optimization passes. */
3614void
3615glsl_to_tgsi_visitor::renumber_registers(void)
3616{
3617   int i = 0;
3618   int new_index = 0;
3619
3620   for (i=0; i < this->next_temp; i++) {
3621      if (get_first_temp_read(i) < 0) continue;
3622      if (i != new_index)
3623         rename_temp_register(i, new_index);
3624      new_index++;
3625   }
3626
3627   this->next_temp = new_index;
3628}
3629
3630/**
3631 * Returns a fragment program which implements the current pixel transfer ops.
3632 * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c.
3633 */
3634extern "C" void
3635get_pixel_transfer_visitor(struct st_fragment_program *fp,
3636                           glsl_to_tgsi_visitor *original,
3637                           int scale_and_bias, int pixel_maps)
3638{
3639   glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
3640   struct st_context *st = st_context(original->ctx);
3641   struct gl_program *prog = &fp->Base.Base;
3642   struct gl_program_parameter_list *params = _mesa_new_parameter_list();
3643   st_src_reg coord, src0;
3644   st_dst_reg dst0;
3645   glsl_to_tgsi_instruction *inst;
3646
3647   /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
3648   v->ctx = original->ctx;
3649   v->prog = prog;
3650   v->shader_program = NULL;
3651   v->glsl_version = original->glsl_version;
3652   v->native_integers = original->native_integers;
3653   v->options = original->options;
3654   v->next_temp = original->next_temp;
3655   v->num_address_regs = original->num_address_regs;
3656   v->samplers_used = prog->SamplersUsed = original->samplers_used;
3657   v->indirect_addr_temps = original->indirect_addr_temps;
3658   v->indirect_addr_consts = original->indirect_addr_consts;
3659   memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
3660   v->num_immediates = original->num_immediates;
3661
3662   /*
3663    * Get initial pixel color from the texture.
3664    * TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
3665    */
3666   coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
3667   src0 = v->get_temp(glsl_type::vec4_type);
3668   dst0 = st_dst_reg(src0);
3669   inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
3670   inst->sampler = 0;
3671   inst->tex_target = TEXTURE_2D_INDEX;
3672
3673   prog->InputsRead |= FRAG_BIT_TEX0;
3674   prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */
3675   v->samplers_used |= (1 << 0);
3676
3677   if (scale_and_bias) {
3678      static const gl_state_index scale_state[STATE_LENGTH] =
3679         { STATE_INTERNAL, STATE_PT_SCALE,
3680           (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
3681      static const gl_state_index bias_state[STATE_LENGTH] =
3682         { STATE_INTERNAL, STATE_PT_BIAS,
3683           (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
3684      GLint scale_p, bias_p;
3685      st_src_reg scale, bias;
3686
3687      scale_p = _mesa_add_state_reference(params, scale_state);
3688      bias_p = _mesa_add_state_reference(params, bias_state);
3689
3690      /* MAD colorTemp, colorTemp, scale, bias; */
3691      scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT);
3692      bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT);
3693      inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias);
3694   }
3695
3696   if (pixel_maps) {
3697      st_src_reg temp = v->get_temp(glsl_type::vec4_type);
3698      st_dst_reg temp_dst = st_dst_reg(temp);
3699
3700      assert(st->pixel_xfer.pixelmap_texture);
3701
3702      /* With a little effort, we can do four pixel map look-ups with
3703       * two TEX instructions:
3704       */
3705
3706      /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */
3707      temp_dst.writemask = WRITEMASK_XY; /* write R,G */
3708      inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
3709      inst->sampler = 1;
3710      inst->tex_target = TEXTURE_2D_INDEX;
3711
3712      /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */
3713      src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
3714      temp_dst.writemask = WRITEMASK_ZW; /* write B,A */
3715      inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
3716      inst->sampler = 1;
3717      inst->tex_target = TEXTURE_2D_INDEX;
3718
3719      prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */
3720      v->samplers_used |= (1 << 1);
3721
3722      /* MOV colorTemp, temp; */
3723      inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp);
3724   }
3725
3726   /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
3727    * new visitor. */
3728   foreach_iter(exec_list_iterator, iter, original->instructions) {
3729      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3730      glsl_to_tgsi_instruction *newinst;
3731      st_src_reg src_regs[3];
3732
3733      if (inst->dst.file == PROGRAM_OUTPUT)
3734         prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
3735
3736      for (int i=0; i<3; i++) {
3737         src_regs[i] = inst->src[i];
3738         if (src_regs[i].file == PROGRAM_INPUT &&
3739             src_regs[i].index == FRAG_ATTRIB_COL0)
3740         {
3741            src_regs[i].file = PROGRAM_TEMPORARY;
3742            src_regs[i].index = src0.index;
3743         }
3744         else if (src_regs[i].file == PROGRAM_INPUT)
3745            prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
3746      }
3747
3748      newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
3749      newinst->tex_target = inst->tex_target;
3750   }
3751
3752   /* Make modifications to fragment program info. */
3753   prog->Parameters = _mesa_combine_parameter_lists(params,
3754                                                    original->prog->Parameters);
3755   _mesa_free_parameter_list(params);
3756   count_resources(v, prog);
3757   fp->glsl_to_tgsi = v;
3758}
3759
3760/**
3761 * Make fragment program for glBitmap:
3762 *   Sample the texture and kill the fragment if the bit is 0.
3763 * This program will be combined with the user's fragment program.
3764 *
3765 * Based on make_bitmap_fragment_program in st_cb_bitmap.c.
3766 */
3767extern "C" void
3768get_bitmap_visitor(struct st_fragment_program *fp,
3769                   glsl_to_tgsi_visitor *original, int samplerIndex)
3770{
3771   glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
3772   struct st_context *st = st_context(original->ctx);
3773   struct gl_program *prog = &fp->Base.Base;
3774   st_src_reg coord, src0;
3775   st_dst_reg dst0;
3776   glsl_to_tgsi_instruction *inst;
3777
3778   /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
3779   v->ctx = original->ctx;
3780   v->prog = prog;
3781   v->shader_program = NULL;
3782   v->glsl_version = original->glsl_version;
3783   v->native_integers = original->native_integers;
3784   v->options = original->options;
3785   v->next_temp = original->next_temp;
3786   v->num_address_regs = original->num_address_regs;
3787   v->samplers_used = prog->SamplersUsed = original->samplers_used;
3788   v->indirect_addr_temps = original->indirect_addr_temps;
3789   v->indirect_addr_consts = original->indirect_addr_consts;
3790   memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
3791   v->num_immediates = original->num_immediates;
3792
3793   /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
3794   coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
3795   src0 = v->get_temp(glsl_type::vec4_type);
3796   dst0 = st_dst_reg(src0);
3797   inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
3798   inst->sampler = samplerIndex;
3799   inst->tex_target = TEXTURE_2D_INDEX;
3800
3801   prog->InputsRead |= FRAG_BIT_TEX0;
3802   prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */
3803   v->samplers_used |= (1 << samplerIndex);
3804
3805   /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
3806   src0.negate = NEGATE_XYZW;
3807   if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
3808      src0.swizzle = SWIZZLE_XXXX;
3809   inst = v->emit(NULL, TGSI_OPCODE_KIL, undef_dst, src0);
3810
3811   /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
3812    * new visitor. */
3813   foreach_iter(exec_list_iterator, iter, original->instructions) {
3814      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3815      glsl_to_tgsi_instruction *newinst;
3816      st_src_reg src_regs[3];
3817
3818      if (inst->dst.file == PROGRAM_OUTPUT)
3819         prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
3820
3821      for (int i=0; i<3; i++) {
3822         src_regs[i] = inst->src[i];
3823         if (src_regs[i].file == PROGRAM_INPUT)
3824            prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
3825      }
3826
3827      newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
3828      newinst->tex_target = inst->tex_target;
3829   }
3830
3831   /* Make modifications to fragment program info. */
3832   prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters);
3833   count_resources(v, prog);
3834   fp->glsl_to_tgsi = v;
3835}
3836
3837/* ------------------------- TGSI conversion stuff -------------------------- */
3838struct label {
3839   unsigned branch_target;
3840   unsigned token;
3841};
3842
3843/**
3844 * Intermediate state used during shader translation.
3845 */
3846struct st_translate {
3847   struct ureg_program *ureg;
3848
3849   struct ureg_dst temps[MAX_TEMPS];
3850   struct ureg_src *constants;
3851   struct ureg_src *immediates;
3852   struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
3853   struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
3854   struct ureg_dst address[1];
3855   struct ureg_src samplers[PIPE_MAX_SAMPLERS];
3856   struct ureg_src systemValues[SYSTEM_VALUE_MAX];
3857
3858   const GLuint *inputMapping;
3859   const GLuint *outputMapping;
3860
3861   /* For every instruction that contains a label (eg CALL), keep
3862    * details so that we can go back afterwards and emit the correct
3863    * tgsi instruction number for each label.
3864    */
3865   struct label *labels;
3866   unsigned labels_size;
3867   unsigned labels_count;
3868
3869   /* Keep a record of the tgsi instruction number that each mesa
3870    * instruction starts at, will be used to fix up labels after
3871    * translation.
3872    */
3873   unsigned *insn;
3874   unsigned insn_size;
3875   unsigned insn_count;
3876
3877   unsigned procType;  /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */
3878
3879   boolean error;
3880};
3881
3882/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
3883static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
3884   TGSI_SEMANTIC_FACE,
3885   TGSI_SEMANTIC_VERTEXID,
3886   TGSI_SEMANTIC_INSTANCEID
3887};
3888
3889/**
3890 * Make note of a branch to a label in the TGSI code.
3891 * After we've emitted all instructions, we'll go over the list
3892 * of labels built here and patch the TGSI code with the actual
3893 * location of each label.
3894 */
3895static unsigned *get_label(struct st_translate *t, unsigned branch_target)
3896{
3897   unsigned i;
3898
3899   if (t->labels_count + 1 >= t->labels_size) {
3900      t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
3901      t->labels = (struct label *)realloc(t->labels,
3902                                          t->labels_size * sizeof(struct label));
3903      if (t->labels == NULL) {
3904         static unsigned dummy;
3905         t->error = TRUE;
3906         return &dummy;
3907      }
3908   }
3909
3910   i = t->labels_count++;
3911   t->labels[i].branch_target = branch_target;
3912   return &t->labels[i].token;
3913}
3914
3915/**
3916 * Called prior to emitting the TGSI code for each instruction.
3917 * Allocate additional space for instructions if needed.
3918 * Update the insn[] array so the next glsl_to_tgsi_instruction points to
3919 * the next TGSI instruction.
3920 */
3921static void set_insn_start(struct st_translate *t, unsigned start)
3922{
3923   if (t->insn_count + 1 >= t->insn_size) {
3924      t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
3925      t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0]));
3926      if (t->insn == NULL) {
3927         t->error = TRUE;
3928         return;
3929      }
3930   }
3931
3932   t->insn[t->insn_count++] = start;
3933}
3934
3935/**
3936 * Map a glsl_to_tgsi constant/immediate to a TGSI immediate.
3937 */
3938static struct ureg_src
3939emit_immediate(struct st_translate *t,
3940               gl_constant_value values[4],
3941               int type, int size)
3942{
3943   struct ureg_program *ureg = t->ureg;
3944
3945   switch(type)
3946   {
3947   case GL_FLOAT:
3948      return ureg_DECL_immediate(ureg, &values[0].f, size);
3949   case GL_INT:
3950      return ureg_DECL_immediate_int(ureg, &values[0].i, size);
3951   case GL_UNSIGNED_INT:
3952   case GL_BOOL:
3953      return ureg_DECL_immediate_uint(ureg, &values[0].u, size);
3954   default:
3955      assert(!"should not get here - type must be float, int, uint, or bool");
3956      return ureg_src_undef();
3957   }
3958}
3959
3960/**
3961 * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register.
3962 */
3963static struct ureg_dst
3964dst_register(struct st_translate *t,
3965             gl_register_file file,
3966             GLuint index)
3967{
3968   switch(file) {
3969   case PROGRAM_UNDEFINED:
3970      return ureg_dst_undef();
3971
3972   case PROGRAM_TEMPORARY:
3973      if (ureg_dst_is_undef(t->temps[index]))
3974         t->temps[index] = ureg_DECL_local_temporary(t->ureg);
3975
3976      return t->temps[index];
3977
3978   case PROGRAM_OUTPUT:
3979      if (t->procType == TGSI_PROCESSOR_VERTEX)
3980         assert(index < VERT_RESULT_MAX);
3981      else if (t->procType == TGSI_PROCESSOR_FRAGMENT)
3982         assert(index < FRAG_RESULT_MAX);
3983      else
3984         assert(index < GEOM_RESULT_MAX);
3985
3986      assert(t->outputMapping[index] < Elements(t->outputs));
3987
3988      return t->outputs[t->outputMapping[index]];
3989
3990   case PROGRAM_ADDRESS:
3991      return t->address[index];
3992
3993   default:
3994      assert(!"unknown dst register file");
3995      return ureg_dst_undef();
3996   }
3997}
3998
3999/**
4000 * Map a glsl_to_tgsi src register to a TGSI ureg_src register.
4001 */
4002static struct ureg_src
4003src_register(struct st_translate *t,
4004             gl_register_file file,
4005             GLuint index)
4006{
4007   switch(file) {
4008   case PROGRAM_UNDEFINED:
4009      return ureg_src_undef();
4010
4011   case PROGRAM_TEMPORARY:
4012      assert(index >= 0);
4013      assert(index < Elements(t->temps));
4014      if (ureg_dst_is_undef(t->temps[index]))
4015         t->temps[index] = ureg_DECL_local_temporary(t->ureg);
4016      return ureg_src(t->temps[index]);
4017
4018   case PROGRAM_NAMED_PARAM:
4019   case PROGRAM_ENV_PARAM:
4020   case PROGRAM_LOCAL_PARAM:
4021   case PROGRAM_UNIFORM:
4022      assert(index >= 0);
4023      return t->constants[index];
4024   case PROGRAM_STATE_VAR:
4025   case PROGRAM_CONSTANT:       /* ie, immediate */
4026      if (index < 0)
4027         return ureg_DECL_constant(t->ureg, 0);
4028      else
4029         return t->constants[index];
4030
4031   case PROGRAM_IMMEDIATE:
4032      return t->immediates[index];
4033
4034   case PROGRAM_INPUT:
4035      assert(t->inputMapping[index] < Elements(t->inputs));
4036      return t->inputs[t->inputMapping[index]];
4037
4038   case PROGRAM_OUTPUT:
4039      assert(t->outputMapping[index] < Elements(t->outputs));
4040      return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */
4041
4042   case PROGRAM_ADDRESS:
4043      return ureg_src(t->address[index]);
4044
4045   case PROGRAM_SYSTEM_VALUE:
4046      assert(index < Elements(t->systemValues));
4047      return t->systemValues[index];
4048
4049   default:
4050      assert(!"unknown src register file");
4051      return ureg_src_undef();
4052   }
4053}
4054
4055/**
4056 * Create a TGSI ureg_dst register from an st_dst_reg.
4057 */
4058static struct ureg_dst
4059translate_dst(struct st_translate *t,
4060              const st_dst_reg *dst_reg,
4061              bool saturate, bool clamp_color)
4062{
4063   struct ureg_dst dst = dst_register(t,
4064                                      dst_reg->file,
4065                                      dst_reg->index);
4066
4067   dst = ureg_writemask(dst, dst_reg->writemask);
4068
4069   if (saturate)
4070      dst = ureg_saturate(dst);
4071   else if (clamp_color && dst_reg->file == PROGRAM_OUTPUT) {
4072      /* Clamp colors for ARB_color_buffer_float. */
4073      switch (t->procType) {
4074      case TGSI_PROCESSOR_VERTEX:
4075         /* XXX if the geometry shader is present, this must be done there
4076          * instead of here. */
4077         if (dst_reg->index == VERT_RESULT_COL0 ||
4078             dst_reg->index == VERT_RESULT_COL1 ||
4079             dst_reg->index == VERT_RESULT_BFC0 ||
4080             dst_reg->index == VERT_RESULT_BFC1) {
4081            dst = ureg_saturate(dst);
4082         }
4083         break;
4084
4085      case TGSI_PROCESSOR_FRAGMENT:
4086         if (dst_reg->index >= FRAG_RESULT_COLOR) {
4087            dst = ureg_saturate(dst);
4088         }
4089         break;
4090      }
4091   }
4092
4093   if (dst_reg->reladdr != NULL)
4094      dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
4095
4096   return dst;
4097}
4098
4099/**
4100 * Create a TGSI ureg_src register from an st_src_reg.
4101 */
4102static struct ureg_src
4103translate_src(struct st_translate *t, const st_src_reg *src_reg)
4104{
4105   struct ureg_src src = src_register(t, src_reg->file, src_reg->index);
4106
4107   src = ureg_swizzle(src,
4108                      GET_SWZ(src_reg->swizzle, 0) & 0x3,
4109                      GET_SWZ(src_reg->swizzle, 1) & 0x3,
4110                      GET_SWZ(src_reg->swizzle, 2) & 0x3,
4111                      GET_SWZ(src_reg->swizzle, 3) & 0x3);
4112
4113   if ((src_reg->negate & 0xf) == NEGATE_XYZW)
4114      src = ureg_negate(src);
4115
4116   if (src_reg->reladdr != NULL) {
4117      /* Normally ureg_src_indirect() would be used here, but a stupid compiler
4118       * bug in g++ makes ureg_src_indirect (an inline C function) erroneously
4119       * set the bit for src.Negate.  So we have to do the operation manually
4120       * here to work around the compiler's problems. */
4121      /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/
4122      struct ureg_src addr = ureg_src(t->address[0]);
4123      src.Indirect = 1;
4124      src.IndirectFile = addr.File;
4125      src.IndirectIndex = addr.Index;
4126      src.IndirectSwizzle = addr.SwizzleX;
4127
4128      if (src_reg->file != PROGRAM_INPUT &&
4129          src_reg->file != PROGRAM_OUTPUT) {
4130         /* If src_reg->index was negative, it was set to zero in
4131          * src_register().  Reassign it now.  But don't do this
4132          * for input/output regs since they get remapped while
4133          * const buffers don't.
4134          */
4135         src.Index = src_reg->index;
4136      }
4137   }
4138
4139   return src;
4140}
4141
4142static struct tgsi_texture_offset
4143translate_tex_offset(struct st_translate *t,
4144                     const struct tgsi_texture_offset *in_offset)
4145{
4146   struct tgsi_texture_offset offset;
4147
4148   assert(in_offset->File == PROGRAM_IMMEDIATE);
4149
4150   offset.File = TGSI_FILE_IMMEDIATE;
4151   offset.Index = in_offset->Index;
4152   offset.SwizzleX = in_offset->SwizzleX;
4153   offset.SwizzleY = in_offset->SwizzleY;
4154   offset.SwizzleZ = in_offset->SwizzleZ;
4155
4156   return offset;
4157}
4158
4159static void
4160compile_tgsi_instruction(struct st_translate *t,
4161                         const glsl_to_tgsi_instruction *inst,
4162                         bool clamp_dst_color_output)
4163{
4164   struct ureg_program *ureg = t->ureg;
4165   GLuint i;
4166   struct ureg_dst dst[1];
4167   struct ureg_src src[4];
4168   struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET];
4169
4170   unsigned num_dst;
4171   unsigned num_src;
4172
4173   num_dst = num_inst_dst_regs(inst->op);
4174   num_src = num_inst_src_regs(inst->op);
4175
4176   if (num_dst)
4177      dst[0] = translate_dst(t,
4178                             &inst->dst,
4179                             inst->saturate,
4180                             clamp_dst_color_output);
4181
4182   for (i = 0; i < num_src; i++)
4183      src[i] = translate_src(t, &inst->src[i]);
4184
4185   switch(inst->op) {
4186   case TGSI_OPCODE_BGNLOOP:
4187   case TGSI_OPCODE_CAL:
4188   case TGSI_OPCODE_ELSE:
4189   case TGSI_OPCODE_ENDLOOP:
4190   case TGSI_OPCODE_IF:
4191      assert(num_dst == 0);
4192      ureg_label_insn(ureg,
4193                      inst->op,
4194                      src, num_src,
4195                      get_label(t,
4196                                inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0));
4197      return;
4198
4199   case TGSI_OPCODE_TEX:
4200   case TGSI_OPCODE_TXB:
4201   case TGSI_OPCODE_TXD:
4202   case TGSI_OPCODE_TXL:
4203   case TGSI_OPCODE_TXP:
4204   case TGSI_OPCODE_TXQ:
4205   case TGSI_OPCODE_TXF:
4206      src[num_src++] = t->samplers[inst->sampler];
4207      for (i = 0; i < inst->tex_offset_num_offset; i++) {
4208         texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]);
4209      }
4210      ureg_tex_insn(ureg,
4211                    inst->op,
4212                    dst, num_dst,
4213                    st_translate_texture_target(inst->tex_target, inst->tex_shadow),
4214                    texoffsets, inst->tex_offset_num_offset,
4215                    src, num_src);
4216      return;
4217
4218   case TGSI_OPCODE_SCS:
4219      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY);
4220      ureg_insn(ureg, inst->op, dst, num_dst, src, num_src);
4221      break;
4222
4223   default:
4224      ureg_insn(ureg,
4225                inst->op,
4226                dst, num_dst,
4227                src, num_src);
4228      break;
4229   }
4230}
4231
4232/**
4233 * Emit the TGSI instructions for inverting and adjusting WPOS.
4234 * This code is unavoidable because it also depends on whether
4235 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
4236 */
4237static void
4238emit_wpos_adjustment( struct st_translate *t,
4239                      const struct gl_program *program,
4240                      boolean invert,
4241                      GLfloat adjX, GLfloat adjY[2])
4242{
4243   struct ureg_program *ureg = t->ureg;
4244
4245   /* Fragment program uses fragment position input.
4246    * Need to replace instances of INPUT[WPOS] with temp T
4247    * where T = INPUT[WPOS] by y is inverted.
4248    */
4249   static const gl_state_index wposTransformState[STATE_LENGTH]
4250      = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM,
4251          (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 };
4252
4253   /* XXX: note we are modifying the incoming shader here!  Need to
4254    * do this before emitting the constant decls below, or this
4255    * will be missed:
4256    */
4257   unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
4258                                                       wposTransformState);
4259
4260   struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst );
4261   struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg );
4262   struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
4263
4264   /* First, apply the coordinate shift: */
4265   if (adjX || adjY[0] || adjY[1]) {
4266      if (adjY[0] != adjY[1]) {
4267         /* Adjust the y coordinate by adjY[1] or adjY[0] respectively
4268          * depending on whether inversion is actually going to be applied
4269          * or not, which is determined by testing against the inversion
4270          * state variable used below, which will be either +1 or -1.
4271          */
4272         struct ureg_dst adj_temp = ureg_DECL_local_temporary(ureg);
4273
4274         ureg_CMP(ureg, adj_temp,
4275                  ureg_scalar(wpostrans, invert ? 2 : 0),
4276                  ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f),
4277                  ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f));
4278         ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp));
4279      } else {
4280         ureg_ADD(ureg, wpos_temp, wpos_input,
4281                  ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f));
4282      }
4283      wpos_input = ureg_src(wpos_temp);
4284   } else {
4285      /* MOV wpos_temp, input[wpos]
4286       */
4287      ureg_MOV( ureg, wpos_temp, wpos_input );
4288   }
4289
4290   /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be
4291    * inversion/identity, or the other way around if we're drawing to an FBO.
4292    */
4293   if (invert) {
4294      /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
4295       */
4296      ureg_MAD( ureg,
4297                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
4298                wpos_input,
4299                ureg_scalar(wpostrans, 0),
4300                ureg_scalar(wpostrans, 1));
4301   } else {
4302      /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
4303       */
4304      ureg_MAD( ureg,
4305                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
4306                wpos_input,
4307                ureg_scalar(wpostrans, 2),
4308                ureg_scalar(wpostrans, 3));
4309   }
4310
4311   /* Use wpos_temp as position input from here on:
4312    */
4313   t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
4314}
4315
4316
4317/**
4318 * Emit fragment position/ooordinate code.
4319 */
4320static void
4321emit_wpos(struct st_context *st,
4322          struct st_translate *t,
4323          const struct gl_program *program,
4324          struct ureg_program *ureg)
4325{
4326   const struct gl_fragment_program *fp =
4327      (const struct gl_fragment_program *) program;
4328   struct pipe_screen *pscreen = st->pipe->screen;
4329   GLfloat adjX = 0.0f;
4330   GLfloat adjY[2] = { 0.0f, 0.0f };
4331   boolean invert = FALSE;
4332
4333   /* Query the pixel center conventions supported by the pipe driver and set
4334    * adjX, adjY to help out if it cannot handle the requested one internally.
4335    *
4336    * The bias of the y-coordinate depends on whether y-inversion takes place
4337    * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are
4338    * drawing to an FBO (causes additional inversion), and whether the the pipe
4339    * driver origin and the requested origin differ (the latter condition is
4340    * stored in the 'invert' variable).
4341    *
4342    * For height = 100 (i = integer, h = half-integer, l = lower, u = upper):
4343    *
4344    * center shift only:
4345    * i -> h: +0.5
4346    * h -> i: -0.5
4347    *
4348    * inversion only:
4349    * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99
4350    * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5
4351    * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0
4352    * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5
4353    *
4354    * inversion and center shift:
4355    * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5
4356    * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99
4357    * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5
4358    * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0
4359    */
4360   if (fp->OriginUpperLeft) {
4361      /* Fragment shader wants origin in upper-left */
4362      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
4363         /* the driver supports upper-left origin */
4364      }
4365      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
4366         /* the driver supports lower-left origin, need to invert Y */
4367         ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
4368         invert = TRUE;
4369      }
4370      else
4371         assert(0);
4372   }
4373   else {
4374      /* Fragment shader wants origin in lower-left */
4375      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
4376         /* the driver supports lower-left origin */
4377         ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
4378      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
4379         /* the driver supports upper-left origin, need to invert Y */
4380         invert = TRUE;
4381      else
4382         assert(0);
4383   }
4384
4385   if (fp->PixelCenterInteger) {
4386      /* Fragment shader wants pixel center integer */
4387      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
4388         /* the driver supports pixel center integer */
4389         adjY[1] = 1.0f;
4390         ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
4391      }
4392      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
4393         /* the driver supports pixel center half integer, need to bias X,Y */
4394         adjX = -0.5f;
4395         adjY[0] = -0.5f;
4396         adjY[1] = 0.5f;
4397      }
4398      else
4399         assert(0);
4400   }
4401   else {
4402      /* Fragment shader wants pixel center half integer */
4403      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
4404         /* the driver supports pixel center half integer */
4405      }
4406      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
4407         /* the driver supports pixel center integer, need to bias X,Y */
4408         adjX = adjY[0] = adjY[1] = 0.5f;
4409         ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
4410      }
4411      else
4412         assert(0);
4413   }
4414
4415   /* we invert after adjustment so that we avoid the MOV to temporary,
4416    * and reuse the adjustment ADD instead */
4417   emit_wpos_adjustment(t, program, invert, adjX, adjY);
4418}
4419
4420/**
4421 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back.
4422 * TGSI uses +1 for front, -1 for back.
4423 * This function converts the TGSI value to the GL value.  Simply clamping/
4424 * saturating the value to [0,1] does the job.
4425 */
4426static void
4427emit_face_var(struct st_translate *t)
4428{
4429   struct ureg_program *ureg = t->ureg;
4430   struct ureg_dst face_temp = ureg_DECL_temporary(ureg);
4431   struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]];
4432
4433   /* MOV_SAT face_temp, input[face] */
4434   face_temp = ureg_saturate(face_temp);
4435   ureg_MOV(ureg, face_temp, face_input);
4436
4437   /* Use face_temp as face input from here on: */
4438   t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp);
4439}
4440
4441static void
4442emit_edgeflags(struct st_translate *t)
4443{
4444   struct ureg_program *ureg = t->ureg;
4445   struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]];
4446   struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
4447
4448   ureg_MOV(ureg, edge_dst, edge_src);
4449}
4450
4451/**
4452 * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
4453 * \param program  the program to translate
4454 * \param numInputs  number of input registers used
4455 * \param inputMapping  maps Mesa fragment program inputs to TGSI generic
4456 *                      input indexes
4457 * \param inputSemanticName  the TGSI_SEMANTIC flag for each input
4458 * \param inputSemanticIndex  the semantic index (ex: which texcoord) for
4459 *                            each input
4460 * \param interpMode  the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
4461 * \param numOutputs  number of output registers used
4462 * \param outputMapping  maps Mesa fragment program outputs to TGSI
4463 *                       generic outputs
4464 * \param outputSemanticName  the TGSI_SEMANTIC flag for each output
4465 * \param outputSemanticIndex  the semantic index (ex: which texcoord) for
4466 *                             each output
4467 *
4468 * \return  PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
4469 */
4470extern "C" enum pipe_error
4471st_translate_program(
4472   struct gl_context *ctx,
4473   uint procType,
4474   struct ureg_program *ureg,
4475   glsl_to_tgsi_visitor *program,
4476   const struct gl_program *proginfo,
4477   GLuint numInputs,
4478   const GLuint inputMapping[],
4479   const ubyte inputSemanticName[],
4480   const ubyte inputSemanticIndex[],
4481   const GLuint interpMode[],
4482   GLuint numOutputs,
4483   const GLuint outputMapping[],
4484   const ubyte outputSemanticName[],
4485   const ubyte outputSemanticIndex[],
4486   boolean passthrough_edgeflags,
4487   boolean clamp_color)
4488{
4489   struct st_translate *t;
4490   unsigned i;
4491   enum pipe_error ret = PIPE_OK;
4492
4493   assert(numInputs <= Elements(t->inputs));
4494   assert(numOutputs <= Elements(t->outputs));
4495
4496   t = CALLOC_STRUCT(st_translate);
4497   if (!t) {
4498      ret = PIPE_ERROR_OUT_OF_MEMORY;
4499      goto out;
4500   }
4501
4502   memset(t, 0, sizeof *t);
4503
4504   t->procType = procType;
4505   t->inputMapping = inputMapping;
4506   t->outputMapping = outputMapping;
4507   t->ureg = ureg;
4508
4509   if (program->shader_program) {
4510      for (i = 0; i < program->shader_program->NumUserUniformStorage; i++) {
4511         struct gl_uniform_storage *const storage =
4512               &program->shader_program->UniformStorage[i];
4513
4514         _mesa_uniform_detach_all_driver_storage(storage);
4515      }
4516   }
4517
4518   /*
4519    * Declare input attributes.
4520    */
4521   if (procType == TGSI_PROCESSOR_FRAGMENT) {
4522      for (i = 0; i < numInputs; i++) {
4523         t->inputs[i] = ureg_DECL_fs_input(ureg,
4524                                           inputSemanticName[i],
4525                                           inputSemanticIndex[i],
4526                                           interpMode[i]);
4527      }
4528
4529      if (proginfo->InputsRead & FRAG_BIT_WPOS) {
4530         /* Must do this after setting up t->inputs, and before
4531          * emitting constant references, below:
4532          */
4533          emit_wpos(st_context(ctx), t, proginfo, ureg);
4534      }
4535
4536      if (proginfo->InputsRead & FRAG_BIT_FACE)
4537         emit_face_var(t);
4538
4539      /*
4540       * Declare output attributes.
4541       */
4542      for (i = 0; i < numOutputs; i++) {
4543         switch (outputSemanticName[i]) {
4544         case TGSI_SEMANTIC_POSITION:
4545            t->outputs[i] = ureg_DECL_output(ureg,
4546                                             TGSI_SEMANTIC_POSITION, /* Z/Depth */
4547                                             outputSemanticIndex[i]);
4548            t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z);
4549            break;
4550         case TGSI_SEMANTIC_STENCIL:
4551            t->outputs[i] = ureg_DECL_output(ureg,
4552                                             TGSI_SEMANTIC_STENCIL, /* Stencil */
4553                                             outputSemanticIndex[i]);
4554            t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y);
4555            break;
4556         case TGSI_SEMANTIC_COLOR:
4557            t->outputs[i] = ureg_DECL_output(ureg,
4558                                             TGSI_SEMANTIC_COLOR,
4559                                             outputSemanticIndex[i]);
4560            break;
4561         default:
4562            assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR");
4563            ret = PIPE_ERROR_BAD_INPUT;
4564            goto out;
4565         }
4566      }
4567   }
4568   else if (procType == TGSI_PROCESSOR_GEOMETRY) {
4569      for (i = 0; i < numInputs; i++) {
4570         t->inputs[i] = ureg_DECL_gs_input(ureg,
4571                                           i,
4572                                           inputSemanticName[i],
4573                                           inputSemanticIndex[i]);
4574      }
4575
4576      for (i = 0; i < numOutputs; i++) {
4577         t->outputs[i] = ureg_DECL_output(ureg,
4578                                          outputSemanticName[i],
4579                                          outputSemanticIndex[i]);
4580      }
4581   }
4582   else {
4583      assert(procType == TGSI_PROCESSOR_VERTEX);
4584
4585      for (i = 0; i < numInputs; i++) {
4586         t->inputs[i] = ureg_DECL_vs_input(ureg, i);
4587      }
4588
4589      for (i = 0; i < numOutputs; i++) {
4590         if (outputSemanticName[i] == TGSI_SEMANTIC_CLIPDIST) {
4591            int mask = ((1 << (program->num_clip_distances - 4*outputSemanticIndex[i])) - 1) & TGSI_WRITEMASK_XYZW;
4592            t->outputs[i] = ureg_DECL_output_masked(ureg,
4593                                                    outputSemanticName[i],
4594                                                    outputSemanticIndex[i],
4595                                                    mask);
4596         } else {
4597            t->outputs[i] = ureg_DECL_output(ureg,
4598                                             outputSemanticName[i],
4599                                             outputSemanticIndex[i]);
4600         }
4601      }
4602      if (passthrough_edgeflags)
4603         emit_edgeflags(t);
4604   }
4605
4606   /* Declare address register.
4607    */
4608   if (program->num_address_regs > 0) {
4609      assert(program->num_address_regs == 1);
4610      t->address[0] = ureg_DECL_address(ureg);
4611   }
4612
4613   /* Declare misc input registers
4614    */
4615   {
4616      GLbitfield sysInputs = proginfo->SystemValuesRead;
4617      unsigned numSys = 0;
4618      for (i = 0; sysInputs; i++) {
4619         if (sysInputs & (1 << i)) {
4620            unsigned semName = mesa_sysval_to_semantic[i];
4621            t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0);
4622            numSys++;
4623            sysInputs &= ~(1 << i);
4624         }
4625      }
4626   }
4627
4628   if (program->indirect_addr_temps) {
4629      /* If temps are accessed with indirect addressing, declare temporaries
4630       * in sequential order.  Else, we declare them on demand elsewhere.
4631       * (Note: the number of temporaries is equal to program->next_temp)
4632       */
4633      for (i = 0; i < (unsigned)program->next_temp; i++) {
4634         /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
4635         t->temps[i] = ureg_DECL_local_temporary(t->ureg);
4636      }
4637   }
4638
4639   /* Emit constants and uniforms.  TGSI uses a single index space for these,
4640    * so we put all the translated regs in t->constants.
4641    */
4642   if (proginfo->Parameters) {
4643      t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0]));
4644      if (t->constants == NULL) {
4645         ret = PIPE_ERROR_OUT_OF_MEMORY;
4646         goto out;
4647      }
4648
4649      for (i = 0; i < proginfo->Parameters->NumParameters; i++) {
4650         switch (proginfo->Parameters->Parameters[i].Type) {
4651         case PROGRAM_ENV_PARAM:
4652         case PROGRAM_LOCAL_PARAM:
4653         case PROGRAM_STATE_VAR:
4654         case PROGRAM_NAMED_PARAM:
4655         case PROGRAM_UNIFORM:
4656            t->constants[i] = ureg_DECL_constant(ureg, i);
4657            break;
4658
4659         /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect
4660          * addressing of the const buffer.
4661          * FIXME: Be smarter and recognize param arrays:
4662          * indirect addressing is only valid within the referenced
4663          * array.
4664          */
4665         case PROGRAM_CONSTANT:
4666            if (program->indirect_addr_consts)
4667               t->constants[i] = ureg_DECL_constant(ureg, i);
4668            else
4669               t->constants[i] = emit_immediate(t,
4670                                                proginfo->Parameters->ParameterValues[i],
4671                                                proginfo->Parameters->Parameters[i].DataType,
4672                                                4);
4673            break;
4674         default:
4675            break;
4676         }
4677      }
4678   }
4679
4680   /* Emit immediate values.
4681    */
4682   t->immediates = (struct ureg_src *)CALLOC(program->num_immediates * sizeof(struct ureg_src));
4683   if (t->immediates == NULL) {
4684      ret = PIPE_ERROR_OUT_OF_MEMORY;
4685      goto out;
4686   }
4687   i = 0;
4688   foreach_iter(exec_list_iterator, iter, program->immediates) {
4689      immediate_storage *imm = (immediate_storage *)iter.get();
4690      assert(i < program->num_immediates);
4691      t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size);
4692   }
4693   assert(i == program->num_immediates);
4694
4695   /* texture samplers */
4696   for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
4697      if (program->samplers_used & (1 << i)) {
4698         t->samplers[i] = ureg_DECL_sampler(ureg, i);
4699      }
4700   }
4701
4702   /* Emit each instruction in turn:
4703    */
4704   foreach_iter(exec_list_iterator, iter, program->instructions) {
4705      set_insn_start(t, ureg_get_instruction_number(ureg));
4706      compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get(),
4707                               clamp_color);
4708   }
4709
4710   /* Fix up all emitted labels:
4711    */
4712   for (i = 0; i < t->labels_count; i++) {
4713      ureg_fixup_label(ureg, t->labels[i].token,
4714                       t->insn[t->labels[i].branch_target]);
4715   }
4716
4717   if (program->shader_program) {
4718      /* This has to be done last.  Any operation the can cause
4719       * prog->ParameterValues to get reallocated (e.g., anything that adds a
4720       * program constant) has to happen before creating this linkage.
4721       */
4722      for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
4723         if (program->shader_program->_LinkedShaders[i] == NULL)
4724            continue;
4725
4726         _mesa_associate_uniform_storage(ctx, program->shader_program,
4727               program->shader_program->_LinkedShaders[i]->Program->Parameters);
4728      }
4729   }
4730
4731out:
4732   if (t) {
4733      FREE(t->insn);
4734      FREE(t->labels);
4735      FREE(t->constants);
4736      FREE(t->immediates);
4737
4738      if (t->error) {
4739         debug_printf("%s: translate error flag set\n", __FUNCTION__);
4740      }
4741
4742      FREE(t);
4743   }
4744
4745   return ret;
4746}
4747/* ----------------------------- End TGSI code ------------------------------ */
4748
4749/**
4750 * Convert a shader's GLSL IR into a Mesa gl_program, although without
4751 * generating Mesa IR.
4752 */
4753static struct gl_program *
4754get_mesa_program(struct gl_context *ctx,
4755                 struct gl_shader_program *shader_program,
4756                 struct gl_shader *shader,
4757                 int num_clip_distances)
4758{
4759   glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor();
4760   struct gl_program *prog;
4761   GLenum target;
4762   const char *target_string;
4763   bool progress;
4764   struct gl_shader_compiler_options *options =
4765         &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)];
4766
4767   switch (shader->Type) {
4768   case GL_VERTEX_SHADER:
4769      target = GL_VERTEX_PROGRAM_ARB;
4770      target_string = "vertex";
4771      break;
4772   case GL_FRAGMENT_SHADER:
4773      target = GL_FRAGMENT_PROGRAM_ARB;
4774      target_string = "fragment";
4775      break;
4776   case GL_GEOMETRY_SHADER:
4777      target = GL_GEOMETRY_PROGRAM_NV;
4778      target_string = "geometry";
4779      break;
4780   default:
4781      assert(!"should not be reached");
4782      return NULL;
4783   }
4784
4785   validate_ir_tree(shader->ir);
4786
4787   prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
4788   if (!prog)
4789      return NULL;
4790   prog->Parameters = _mesa_new_parameter_list();
4791   v->ctx = ctx;
4792   v->prog = prog;
4793   v->shader_program = shader_program;
4794   v->options = options;
4795   v->glsl_version = ctx->Const.GLSLVersion;
4796   v->native_integers = ctx->Const.NativeIntegers;
4797   v->num_clip_distances = num_clip_distances;
4798
4799   _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
4800					       prog->Parameters);
4801
4802   /* Remove reads from output registers. */
4803   lower_output_reads(shader->ir);
4804
4805   /* Emit intermediate IR for main(). */
4806   visit_exec_list(shader->ir, v);
4807
4808   /* Now emit bodies for any functions that were used. */
4809   do {
4810      progress = GL_FALSE;
4811
4812      foreach_iter(exec_list_iterator, iter, v->function_signatures) {
4813         function_entry *entry = (function_entry *)iter.get();
4814
4815         if (!entry->bgn_inst) {
4816            v->current_function = entry;
4817
4818            entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB);
4819            entry->bgn_inst->function = entry;
4820
4821            visit_exec_list(&entry->sig->body, v);
4822
4823            glsl_to_tgsi_instruction *last;
4824            last = (glsl_to_tgsi_instruction *)v->instructions.get_tail();
4825            if (last->op != TGSI_OPCODE_RET)
4826               v->emit(NULL, TGSI_OPCODE_RET);
4827
4828            glsl_to_tgsi_instruction *end;
4829            end = v->emit(NULL, TGSI_OPCODE_ENDSUB);
4830            end->function = entry;
4831
4832            progress = GL_TRUE;
4833         }
4834      }
4835   } while (progress);
4836
4837#if 0
4838   /* Print out some information (for debugging purposes) used by the
4839    * optimization passes. */
4840   for (i=0; i < v->next_temp; i++) {
4841      int fr = v->get_first_temp_read(i);
4842      int fw = v->get_first_temp_write(i);
4843      int lr = v->get_last_temp_read(i);
4844      int lw = v->get_last_temp_write(i);
4845
4846      printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw);
4847      assert(fw <= fr);
4848   }
4849#endif
4850
4851   /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
4852   v->simplify_cmp();
4853   v->copy_propagate();
4854   while (v->eliminate_dead_code_advanced());
4855
4856   /* FIXME: These passes to optimize temporary registers don't work when there
4857    * is indirect addressing of the temporary register space.  We need proper
4858    * array support so that we don't have to give up these passes in every
4859    * shader that uses arrays.
4860    */
4861   if (!v->indirect_addr_temps) {
4862      v->eliminate_dead_code();
4863      v->merge_registers();
4864      v->renumber_registers();
4865   }
4866
4867   /* Write the END instruction. */
4868   v->emit(NULL, TGSI_OPCODE_END);
4869
4870   if (ctx->Shader.Flags & GLSL_DUMP) {
4871      printf("\n");
4872      printf("GLSL IR for linked %s program %d:\n", target_string,
4873             shader_program->Name);
4874      _mesa_print_ir(shader->ir, NULL);
4875      printf("\n");
4876      printf("\n");
4877      fflush(stdout);
4878   }
4879
4880   prog->Instructions = NULL;
4881   prog->NumInstructions = 0;
4882
4883   do_set_program_inouts(shader->ir, prog, shader->Type == GL_FRAGMENT_SHADER);
4884   count_resources(v, prog);
4885
4886   _mesa_reference_program(ctx, &shader->Program, prog);
4887
4888   /* This has to be done last.  Any operation the can cause
4889    * prog->ParameterValues to get reallocated (e.g., anything that adds a
4890    * program constant) has to happen before creating this linkage.
4891    */
4892   _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters);
4893   if (!shader_program->LinkStatus) {
4894      return NULL;
4895   }
4896
4897   struct st_vertex_program *stvp;
4898   struct st_fragment_program *stfp;
4899   struct st_geometry_program *stgp;
4900
4901   switch (shader->Type) {
4902   case GL_VERTEX_SHADER:
4903      stvp = (struct st_vertex_program *)prog;
4904      stvp->glsl_to_tgsi = v;
4905      break;
4906   case GL_FRAGMENT_SHADER:
4907      stfp = (struct st_fragment_program *)prog;
4908      stfp->glsl_to_tgsi = v;
4909      break;
4910   case GL_GEOMETRY_SHADER:
4911      stgp = (struct st_geometry_program *)prog;
4912      stgp->glsl_to_tgsi = v;
4913      break;
4914   default:
4915      assert(!"should not be reached");
4916      return NULL;
4917   }
4918
4919   return prog;
4920}
4921
4922/**
4923 * Searches through the IR for a declaration of gl_ClipDistance and returns the
4924 * declared size of the gl_ClipDistance array.  Returns 0 if gl_ClipDistance is
4925 * not declared in the IR.
4926 */
4927int get_clip_distance_size(exec_list *ir)
4928{
4929   foreach_iter (exec_list_iterator, iter, *ir) {
4930      ir_instruction *inst = (ir_instruction *)iter.get();
4931      ir_variable *var = inst->as_variable();
4932      if (var == NULL) continue;
4933      if (!strcmp(var->name, "gl_ClipDistance")) {
4934         return var->type->length;
4935      }
4936   }
4937
4938   return 0;
4939}
4940
4941extern "C" {
4942
4943struct gl_shader *
4944st_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
4945{
4946   struct gl_shader *shader;
4947   assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER ||
4948          type == GL_GEOMETRY_SHADER_ARB);
4949   shader = rzalloc(NULL, struct gl_shader);
4950   if (shader) {
4951      shader->Type = type;
4952      shader->Name = name;
4953      _mesa_init_shader(ctx, shader);
4954   }
4955   return shader;
4956}
4957
4958struct gl_shader_program *
4959st_new_shader_program(struct gl_context *ctx, GLuint name)
4960{
4961   struct gl_shader_program *shProg;
4962   shProg = rzalloc(NULL, struct gl_shader_program);
4963   if (shProg) {
4964      shProg->Name = name;
4965      _mesa_init_shader_program(ctx, shProg);
4966   }
4967   return shProg;
4968}
4969
4970/**
4971 * Link a shader.
4972 * Called via ctx->Driver.LinkShader()
4973 * This actually involves converting GLSL IR into an intermediate TGSI-like IR
4974 * with code lowering and other optimizations.
4975 */
4976GLboolean
4977st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
4978{
4979   int num_clip_distances[MESA_SHADER_TYPES];
4980   assert(prog->LinkStatus);
4981
4982   for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
4983      if (prog->_LinkedShaders[i] == NULL)
4984         continue;
4985
4986      bool progress;
4987      exec_list *ir = prog->_LinkedShaders[i]->ir;
4988      const struct gl_shader_compiler_options *options =
4989            &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)];
4990
4991      /* We have to determine the length of the gl_ClipDistance array before
4992       * the array is lowered to two vec4s by lower_clip_distance().
4993       */
4994      num_clip_distances[i] = get_clip_distance_size(ir);
4995
4996      do {
4997         unsigned what_to_lower = MOD_TO_FRACT | DIV_TO_MUL_RCP |
4998            EXP_TO_EXP2 | LOG_TO_LOG2;
4999         if (options->EmitNoPow)
5000            what_to_lower |= POW_TO_EXP2;
5001         if (!ctx->Const.NativeIntegers)
5002            what_to_lower |= INT_DIV_TO_MUL_RCP;
5003
5004         progress = false;
5005
5006         /* Lowering */
5007         do_mat_op_to_vec(ir);
5008         lower_instructions(ir, what_to_lower);
5009
5010         progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
5011
5012         progress = do_common_optimization(ir, true, true,
5013					   options->MaxUnrollIterations)
5014	   || progress;
5015
5016         progress = lower_quadop_vector(ir, false) || progress;
5017         progress = lower_clip_distance(ir) || progress;
5018
5019         if (options->MaxIfDepth == 0)
5020            progress = lower_discard(ir) || progress;
5021
5022         progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress;
5023
5024         if (options->EmitNoNoise)
5025            progress = lower_noise(ir) || progress;
5026
5027         /* If there are forms of indirect addressing that the driver
5028          * cannot handle, perform the lowering pass.
5029          */
5030         if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput
5031             || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform)
5032           progress =
5033             lower_variable_index_to_cond_assign(ir,
5034        					 options->EmitNoIndirectInput,
5035        					 options->EmitNoIndirectOutput,
5036        					 options->EmitNoIndirectTemp,
5037        					 options->EmitNoIndirectUniform)
5038             || progress;
5039
5040         progress = do_vec_index_to_cond_assign(ir) || progress;
5041      } while (progress);
5042
5043      validate_ir_tree(ir);
5044   }
5045
5046   for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
5047      struct gl_program *linked_prog;
5048
5049      if (prog->_LinkedShaders[i] == NULL)
5050         continue;
5051
5052      linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i],
5053                                     num_clip_distances[i]);
5054
5055      if (linked_prog) {
5056	 static const GLenum targets[] = {
5057	    GL_VERTEX_PROGRAM_ARB,
5058	    GL_FRAGMENT_PROGRAM_ARB,
5059	    GL_GEOMETRY_PROGRAM_NV
5060	 };
5061
5062	 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
5063				 linked_prog);
5064         if (!ctx->Driver.ProgramStringNotify(ctx, targets[i], linked_prog)) {
5065	    _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
5066				    NULL);
5067            _mesa_reference_program(ctx, &linked_prog, NULL);
5068            return GL_FALSE;
5069         }
5070      }
5071
5072      _mesa_reference_program(ctx, &linked_prog, NULL);
5073   }
5074
5075   return GL_TRUE;
5076}
5077
5078void
5079st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi,
5080                                const GLuint outputMapping[],
5081                                struct pipe_stream_output_info *so)
5082{
5083   unsigned i;
5084   struct gl_transform_feedback_info *info =
5085      &glsl_to_tgsi->shader_program->LinkedTransformFeedback;
5086
5087   for (i = 0; i < info->NumOutputs; i++) {
5088      so->output[i].register_index =
5089         outputMapping[info->Outputs[i].OutputRegister];
5090      so->output[i].start_component = info->Outputs[i].ComponentOffset;
5091      so->output[i].num_components = info->Outputs[i].NumComponents;
5092      so->output[i].output_buffer = info->Outputs[i].OutputBuffer;
5093      so->output[i].dst_offset = info->Outputs[i].DstOffset;
5094   }
5095
5096   for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
5097      so->stride[i] = info->BufferStride[i];
5098   }
5099   so->num_outputs = info->NumOutputs;
5100}
5101
5102} /* extern "C" */
5103