brw_vec4_visitor.cpp revision 905f3d03090c7b86e410959c5640054f5f6894ef
11510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org/*
21510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org * Copyright © 2011 Intel Corporation
31510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org *
41510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org * Permission is hereby granted, free of charge, to any person obtaining a
51510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org * copy of this software and associated documentation files (the "Software"),
61510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org * to deal in the Software without restriction, including without limitation
71510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org * the rights to use, copy, modify, merge, publish, distribute, sublicense,
81510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org * and/or sell copies of the Software, and to permit persons to whom the
91510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org * Software is furnished to do so, subject to the following conditions:
101510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org *
111510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org * The above copyright notice and this permission notice (including the next
121510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org * paragraph) shall be included in all copies or substantial portions of the
131510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org * Software.
141510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org *
151510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
161510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
171510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
181510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
191510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
201510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
211510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org * IN THE SOFTWARE.
221510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org */
231510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
241510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org#include "brw_vec4.h"
251510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgextern "C" {
261510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org#include "main/macros.h"
271510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org#include "program/prog_parameter.h"
281510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org}
291510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
301510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgnamespace brw {
311510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
321510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgsrc_reg::src_reg(dst_reg reg)
331510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org{
341510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   init();
351510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
361510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   this->file = reg.file;
371510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   this->reg = reg.reg;
381510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   this->reg_offset = reg.reg_offset;
391510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   this->type = reg.type;
401510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   this->reladdr = reg.reladdr;
411510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
421510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   int swizzles[4];
431510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   int next_chan = 0;
441510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   int last = 0;
451510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
461510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   for (int i = 0; i < 4; i++) {
471510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      if (!(reg.writemask & (1 << i)))
481510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org	 continue;
491510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
501510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      swizzles[next_chan++] = last = i;
511510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   }
521510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
531510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   for (; next_chan < 4; next_chan++) {
541510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      swizzles[next_chan] = last;
551510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   }
561510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
571510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
581510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org				swizzles[2], swizzles[3]);
591510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org}
601510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
611510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgdst_reg::dst_reg(src_reg reg)
621510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org{
631510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   init();
641510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
65169691d93a961c8b511f8ac8fd8ee33d081ca10fdanno@chromium.org   this->file = reg.file;
66169691d93a961c8b511f8ac8fd8ee33d081ca10fdanno@chromium.org   this->reg = reg.reg;
67169691d93a961c8b511f8ac8fd8ee33d081ca10fdanno@chromium.org   this->reg_offset = reg.reg_offset;
68169691d93a961c8b511f8ac8fd8ee33d081ca10fdanno@chromium.org   this->type = reg.type;
691510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   this->writemask = WRITEMASK_XYZW;
701510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   this->reladdr = reg.reladdr;
711510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org}
721510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
731510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgvec4_instruction *
741510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgvec4_visitor::emit(enum opcode opcode, dst_reg dst,
751510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org		   src_reg src0, src_reg src1, src_reg src2)
761510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org{
771510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   vec4_instruction *inst = new(mem_ctx) vec4_instruction();
78169691d93a961c8b511f8ac8fd8ee33d081ca10fdanno@chromium.org
79169691d93a961c8b511f8ac8fd8ee33d081ca10fdanno@chromium.org   inst->opcode = opcode;
80169691d93a961c8b511f8ac8fd8ee33d081ca10fdanno@chromium.org   inst->dst = dst;
81169691d93a961c8b511f8ac8fd8ee33d081ca10fdanno@chromium.org   inst->src[0] = src0;
821510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   inst->src[1] = src1;
831510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   inst->src[2] = src2;
841510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   inst->ir = this->base_ir;
851510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   inst->annotation = this->current_annotation;
861510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
871510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   this->instructions.push_tail(inst);
881510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
891510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   return inst;
901510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org}
911510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
921510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
931510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgvec4_instruction *
941510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgvec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
951510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org{
961510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   return emit(opcode, dst, src0, src1, src_reg());
971510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org}
981510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
991510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgvec4_instruction *
1001510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgvec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
1011510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org{
1021510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   assert(dst.writemask != 0);
1031510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   return emit(opcode, dst, src0, src_reg(), src_reg());
1041510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org}
1051510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
1061510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgvec4_instruction *
1071510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgvec4_visitor::emit(enum opcode opcode)
1081510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org{
1091510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg());
1101510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org}
1111510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
1121510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgvoid
1131510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgvec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
114169691d93a961c8b511f8ac8fd8ee33d081ca10fdanno@chromium.org{
1151510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   static enum opcode dot_opcodes[] = {
1161510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
117af9cfcbed5daf6e636e189bce451c6fafdbb127dmachenbach@chromium.org   };
1181510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
1191510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   emit(dot_opcodes[elements - 2], dst, src0, src1);
120169691d93a961c8b511f8ac8fd8ee33d081ca10fdanno@chromium.org}
1211510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
1221510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgvoid
123af9cfcbed5daf6e636e189bce451c6fafdbb127dmachenbach@chromium.orgvec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
1241510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org{
1251510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   /* The gen6 math instruction ignores the source modifiers --
1261510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org    * swizzle, abs, negate, and at least some parts of the register
127169691d93a961c8b511f8ac8fd8ee33d081ca10fdanno@chromium.org    * region description.
1281510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org    */
1291510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   src_reg temp_src = src_reg(this, glsl_type::vec4_type);
1301510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   emit(BRW_OPCODE_MOV, dst_reg(temp_src), src);
131af9cfcbed5daf6e636e189bce451c6fafdbb127dmachenbach@chromium.org
1321510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   if (dst.writemask != WRITEMASK_XYZW) {
1331510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      /* The gen6 math instruction must be align1, so we can't do
1341510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org       * writemasks.
135169691d93a961c8b511f8ac8fd8ee33d081ca10fdanno@chromium.org       */
1361510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
1371510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
1381510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      emit(opcode, temp_dst, temp_src);
139af9cfcbed5daf6e636e189bce451c6fafdbb127dmachenbach@chromium.org
1401510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst));
1411510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   } else {
1421510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      emit(opcode, dst, temp_src);
143169691d93a961c8b511f8ac8fd8ee33d081ca10fdanno@chromium.org   }
1441510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org}
1451510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
1461510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgvoid
147af9cfcbed5daf6e636e189bce451c6fafdbb127dmachenbach@chromium.orgvec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
1481510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org{
1491510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   vec4_instruction *inst = emit(opcode, dst, src);
1501510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   inst->base_mrf = 1;
151169691d93a961c8b511f8ac8fd8ee33d081ca10fdanno@chromium.org   inst->mlen = 1;
1521510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org}
1531510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
1541510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgvoid
155af9cfcbed5daf6e636e189bce451c6fafdbb127dmachenbach@chromium.orgvec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
1561510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org{
1571510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   switch (opcode) {
1581510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   case SHADER_OPCODE_RCP:
159169691d93a961c8b511f8ac8fd8ee33d081ca10fdanno@chromium.org   case SHADER_OPCODE_RSQ:
1601510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   case SHADER_OPCODE_SQRT:
1611510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   case SHADER_OPCODE_EXP2:
1621510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   case SHADER_OPCODE_LOG2:
1631510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   case SHADER_OPCODE_SIN:
164af9cfcbed5daf6e636e189bce451c6fafdbb127dmachenbach@chromium.org   case SHADER_OPCODE_COS:
1651510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      break;
1661510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   default:
1671510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      assert(!"not reached: bad math opcode");
168169691d93a961c8b511f8ac8fd8ee33d081ca10fdanno@chromium.org      return;
1691510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   }
1701510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
1711510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   if (intel->gen >= 6) {
1721510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      return emit_math1_gen6(opcode, dst, src);
173af9cfcbed5daf6e636e189bce451c6fafdbb127dmachenbach@chromium.org   } else {
1741510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      return emit_math1_gen4(opcode, dst, src);
1751510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   }
1761510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org}
177169691d93a961c8b511f8ac8fd8ee33d081ca10fdanno@chromium.org
1781510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgvoid
1791510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgvec4_visitor::emit_math2_gen6(enum opcode opcode,
1801510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org			      dst_reg dst, src_reg src0, src_reg src1)
1811510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org{
182af9cfcbed5daf6e636e189bce451c6fafdbb127dmachenbach@chromium.org   src_reg expanded;
1831510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
1841510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   /* The gen6 math instruction ignores the source modifiers --
1851510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org    * swizzle, abs, negate, and at least some parts of the register
186169691d93a961c8b511f8ac8fd8ee33d081ca10fdanno@chromium.org    * region description.  Move the sources to temporaries to make it
1871510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org    * generally work.
1881510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org    */
1891510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
1901510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   expanded = src_reg(this, glsl_type::vec4_type);
191af9cfcbed5daf6e636e189bce451c6fafdbb127dmachenbach@chromium.org   emit(BRW_OPCODE_MOV, dst_reg(expanded), src0);
1921510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   src0 = expanded;
1931510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
1941510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   expanded = src_reg(this, glsl_type::vec4_type);
1951510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   emit(BRW_OPCODE_MOV, dst_reg(expanded), src1);
1961510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   src1 = expanded;
1971510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
198169691d93a961c8b511f8ac8fd8ee33d081ca10fdanno@chromium.org   if (dst.writemask != WRITEMASK_XYZW) {
1991510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      /* The gen6 math instruction must be align1, so we can't do
2001510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org       * writemasks.
2011510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org       */
2021510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
2031510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
2041510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      emit(opcode, temp_dst, src0, src1);
205af9cfcbed5daf6e636e189bce451c6fafdbb127dmachenbach@chromium.org
2061510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst));
2071510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   } else {
2081510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      emit(opcode, dst, src0, src1);
2091510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   }
210169691d93a961c8b511f8ac8fd8ee33d081ca10fdanno@chromium.org}
2111510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
2121510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgvoid
2131510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgvec4_visitor::emit_math2_gen4(enum opcode opcode,
2141510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org			      dst_reg dst, src_reg src0, src_reg src1)
2151510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org{
2161510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   vec4_instruction *inst = emit(opcode, dst, src0, src1);
217af9cfcbed5daf6e636e189bce451c6fafdbb127dmachenbach@chromium.org   inst->base_mrf = 1;
2181510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   inst->mlen = 2;
2191510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org}
2201510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
2211510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgvoid
2221510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgvec4_visitor::emit_math(enum opcode opcode,
2231510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org			dst_reg dst, src_reg src0, src_reg src1)
2241510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org{
2251510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   assert(opcode == SHADER_OPCODE_POW);
2261510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
2271510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   if (intel->gen >= 6) {
2281510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      return emit_math2_gen6(opcode, dst, src0, src1);
2291510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   } else {
2301510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      return emit_math2_gen4(opcode, dst, src0, src1);
2311510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   }
2321510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org}
2331510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
2341510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgvoid
2351510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgvec4_visitor::visit_instructions(const exec_list *list)
2361510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org{
2371510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   foreach_list(node, list) {
2381510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      ir_instruction *ir = (ir_instruction *)node;
2391510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
2401510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      base_ir = ir;
2411510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      ir->accept(this);
2421510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   }
2431510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org}
2441510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
2451510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
2461510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgstatic int
2471510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgtype_size(const struct glsl_type *type)
2481510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org{
2491510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   unsigned int i;
2501510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   int size;
2511510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
2521510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   switch (type->base_type) {
2531510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   case GLSL_TYPE_UINT:
2541510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   case GLSL_TYPE_INT:
2551510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   case GLSL_TYPE_FLOAT:
2561510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   case GLSL_TYPE_BOOL:
2571510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      if (type->is_matrix()) {
2581510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org	 return type->matrix_columns;
2591510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      } else {
2601510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org	 /* Regardless of size of vector, it gets a vec4. This is bad
2611510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org	  * packing for things like floats, but otherwise arrays become a
2621510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org	  * mess.  Hopefully a later pass over the code can pack scalars
2631510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org	  * down if appropriate.
2641510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org	  */
2651510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org	 return 1;
2661510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      }
2671510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   case GLSL_TYPE_ARRAY:
2681510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      assert(type->length > 0);
2691510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      return type_size(type->fields.array) * type->length;
2701510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   case GLSL_TYPE_STRUCT:
2711510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      size = 0;
2721510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      for (i = 0; i < type->length; i++) {
2731510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org	 size += type_size(type->fields.structure[i].type);
2741510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      }
2751510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      return size;
2761510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   case GLSL_TYPE_SAMPLER:
2771510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      /* Samplers take up one slot in UNIFORMS[], but they're baked in
2781510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org       * at link time.
2791510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org       */
2801510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      return 1;
2811510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   default:
2821510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      assert(0);
2831510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      return 0;
2841510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   }
2851510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org}
2861510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
2871510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgint
2881510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgvec4_visitor::virtual_grf_alloc(int size)
2891510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org{
2901510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   if (virtual_grf_array_size <= virtual_grf_count) {
2911510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      if (virtual_grf_array_size == 0)
2921510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org	 virtual_grf_array_size = 16;
2931510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      else
2941510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org	 virtual_grf_array_size *= 2;
2951510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
2961510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org				   virtual_grf_array_size);
2971510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   }
2981510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   virtual_grf_sizes[virtual_grf_count] = size;
2991510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   return virtual_grf_count++;
3001510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org}
3011510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
3021510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgsrc_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
3031510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org{
3041510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   init();
3051510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
3061510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   this->file = GRF;
3071510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   this->reg = v->virtual_grf_alloc(type_size(type));
3081510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
3091510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   if (type->is_array() || type->is_record()) {
3101510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      this->swizzle = BRW_SWIZZLE_NOOP;
3111510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   } else {
3121510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      this->swizzle = swizzle_for_size(type->vector_elements);
3131510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   }
31410480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org
31510480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org   this->type = brw_type_for_base_type(type);
31610480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org}
31710480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org
31810480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.orgdst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
31910480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org{
32010480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org   init();
32110480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org
32210480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org   this->file = GRF;
32310480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org   this->reg = v->virtual_grf_alloc(type_size(type));
32410480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org
32510480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org   if (type->is_array() || type->is_record()) {
32610480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org      this->writemask = WRITEMASK_XYZW;
32710480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org   } else {
32810480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org      this->writemask = (1 << type->vector_elements) - 1;
32910480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org   }
33010480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org
33110480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org   this->type = brw_type_for_base_type(type);
33210480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org}
33310480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org
33410480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org/* Our support for uniforms is piggy-backed on the struct
33510480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org * gl_fragment_program, because that's where the values actually
33610480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org * get stored, rather than in some global gl_shader_program uniform
33710480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org * store.
33810480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org */
33910480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.orgint
34010480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.orgvec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
34110480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org{
34210480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org   unsigned int offset = 0;
34310480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org   float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
34410480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org
34510480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org   if (type->is_matrix()) {
34610480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org      const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
34710480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org							type->vector_elements,
34810480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org							1);
34910480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org
35010480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org      for (unsigned int i = 0; i < type->matrix_columns; i++) {
35110480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org	 offset += setup_uniform_values(loc + offset, column);
35210480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org      }
35310480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org
35410480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org      return offset;
35510480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org   }
35610480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org
35710480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org   switch (type->base_type) {
35810480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org   case GLSL_TYPE_FLOAT:
35910480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org   case GLSL_TYPE_UINT:
36010480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org   case GLSL_TYPE_INT:
36110480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org   case GLSL_TYPE_BOOL:
36210480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org      for (unsigned int i = 0; i < type->vector_elements; i++) {
36310480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org	 int slot = this->uniforms * 4 + i;
36410480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org	 switch (type->base_type) {
36510480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org	 case GLSL_TYPE_FLOAT:
36610480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org	    c->prog_data.param_convert[slot] = PARAM_NO_CONVERT;
36710480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org	    break;
36810480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org	 case GLSL_TYPE_UINT:
36910480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org	    c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U;
37010480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org	    break;
37110480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org	 case GLSL_TYPE_INT:
37210480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org	    c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I;
37310480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org	    break;
37410480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org	 case GLSL_TYPE_BOOL:
37510480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org	    c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B;
37610480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org	    break;
37710480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org	 default:
37810480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org	    assert(!"not reached");
37910480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org	    c->prog_data.param_convert[slot] = PARAM_NO_CONVERT;
38010480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org	    break;
38110480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org	 }
38210480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org	 c->prog_data.param[slot] = &values[i];
38310480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org      }
38410480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org
38510480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org      for (unsigned int i = type->vector_elements; i < 4; i++) {
38610480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org	 c->prog_data.param_convert[this->uniforms * 4 + i] =
38710480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org	    PARAM_CONVERT_ZERO;
38810480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org	 c->prog_data.param[this->uniforms * 4 + i] = NULL;
38910480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org      }
39010480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org
39110480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org      this->uniform_size[this->uniforms] = type->vector_elements;
39210480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org      this->uniforms++;
39310480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org
39410480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org      return 1;
39510480471c0db59c51c15e57d2a3489551d61b273jkummerow@chromium.org
3961510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   case GLSL_TYPE_STRUCT:
3971510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      for (unsigned int i = 0; i < type->length; i++) {
3981510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org	 offset += setup_uniform_values(loc + offset,
3991510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org					type->fields.structure[i].type);
400169691d93a961c8b511f8ac8fd8ee33d081ca10fdanno@chromium.org      }
4011510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      return offset;
4021510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
403169691d93a961c8b511f8ac8fd8ee33d081ca10fdanno@chromium.org   case GLSL_TYPE_ARRAY:
404fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org      for (unsigned int i = 0; i < type->length; i++) {
405fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org	 offset += setup_uniform_values(loc + offset, type->fields.array);
406fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org      }
407fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org      return offset;
408fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org
4091510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   case GLSL_TYPE_SAMPLER:
4101510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      /* The sampler takes up a slot, but we don't use any values from it. */
4111510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      return 1;
4121510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
4131510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   default:
4141510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      assert(!"not reached");
4151510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org      return 0;
4161510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   }
4171510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org}
4181510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
4191510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org/* Our support for builtin uniforms is even scarier than non-builtin.
4201510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org * It sits on top of the PROG_STATE_VAR parameters that are
4211510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org * automatically updated from GL context state.
4221510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org */
4231510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgvoid
4241510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgvec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
4251510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org{
4261510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   const ir_state_slot *const slots = ir->state_slots;
4271510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org   assert(ir->state_slots != NULL);
4281510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org
429fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org   for (unsigned int i = 0; i < ir->num_state_slots; i++) {
430fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org      /* This state reference has already been setup by ir_to_mesa,
431fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org       * but we'll get the same index back here.  We can reference
432fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org       * ParameterValues directly, since unlike brw_fs.cpp, we never
433fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org       * add new state references during compile.
434fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org       */
435fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org      int index = _mesa_add_state_reference(this->vp->Base.Parameters,
436fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org					    (gl_state_index *)slots[i].tokens);
437fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org      float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
438fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org
439fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org      this->uniform_size[this->uniforms] = 0;
440fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org      /* Add each of the unique swizzled channels of the element.
441fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org       * This will end up matching the size of the glsl_type of this field.
442fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org       */
443fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org      int last_swiz = -1;
444fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org      for (unsigned int j = 0; j < 4; j++) {
445fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org	 int swiz = GET_SWZ(slots[i].swizzle, j);
446fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org	 last_swiz = swiz;
447fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org
448fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org	 c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
449fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org	 c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT;
450fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org	 if (swiz <= last_swiz)
451fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org	    this->uniform_size[this->uniforms]++;
452fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org      }
453fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org      this->uniforms++;
454fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org   }
455fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org}
456fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org
457fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.orgdst_reg *
458fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.orgvec4_visitor::variable_storage(ir_variable *var)
459fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org{
460fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org   return (dst_reg *)hash_table_find(this->variable_ht, var);
461fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org}
462
463void
464vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
465{
466   ir_expression *expr = ir->as_expression();
467
468   if (expr) {
469      src_reg op[2];
470      vec4_instruction *inst;
471
472      assert(expr->get_num_operands() <= 2);
473      for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
474	 assert(expr->operands[i]->type->is_scalar());
475
476	 expr->operands[i]->accept(this);
477	 op[i] = this->result;
478      }
479
480      switch (expr->operation) {
481      case ir_unop_logic_not:
482	 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1));
483	 inst->conditional_mod = BRW_CONDITIONAL_Z;
484	 break;
485
486      case ir_binop_logic_xor:
487	 inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]);
488	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
489	 break;
490
491      case ir_binop_logic_or:
492	 inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]);
493	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
494	 break;
495
496      case ir_binop_logic_and:
497	 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]);
498	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
499	 break;
500
501      case ir_unop_f2b:
502	 if (intel->gen >= 6) {
503	    inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f));
504	 } else {
505	    inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]);
506	 }
507	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
508	 break;
509
510      case ir_unop_i2b:
511	 if (intel->gen >= 6) {
512	    inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
513	 } else {
514	    inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]);
515	 }
516	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
517	 break;
518
519      case ir_binop_greater:
520      case ir_binop_gequal:
521      case ir_binop_less:
522      case ir_binop_lequal:
523      case ir_binop_equal:
524      case ir_binop_all_equal:
525      case ir_binop_nequal:
526      case ir_binop_any_nequal:
527	 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
528	 inst->conditional_mod =
529	    brw_conditional_for_comparison(expr->operation);
530	 break;
531
532      default:
533	 assert(!"not reached");
534	 break;
535      }
536      return;
537   }
538
539   ir->accept(this);
540
541   if (intel->gen >= 6) {
542      vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(),
543			       this->result, src_reg(1));
544      inst->conditional_mod = BRW_CONDITIONAL_NZ;
545   } else {
546      vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result);
547      inst->conditional_mod = BRW_CONDITIONAL_NZ;
548   }
549}
550
551/**
552 * Emit a gen6 IF statement with the comparison folded into the IF
553 * instruction.
554 */
555void
556vec4_visitor::emit_if_gen6(ir_if *ir)
557{
558   ir_expression *expr = ir->condition->as_expression();
559
560   if (expr) {
561      src_reg op[2];
562      vec4_instruction *inst;
563      dst_reg temp;
564
565      assert(expr->get_num_operands() <= 2);
566      for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
567	 expr->operands[i]->accept(this);
568	 op[i] = this->result;
569      }
570
571      switch (expr->operation) {
572      case ir_unop_logic_not:
573	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
574	 inst->conditional_mod = BRW_CONDITIONAL_Z;
575	 return;
576
577      case ir_binop_logic_xor:
578	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
579	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
580	 return;
581
582      case ir_binop_logic_or:
583	 temp = dst_reg(this, glsl_type::bool_type);
584	 emit(BRW_OPCODE_OR, temp, op[0], op[1]);
585	 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
586	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
587	 return;
588
589      case ir_binop_logic_and:
590	 temp = dst_reg(this, glsl_type::bool_type);
591	 emit(BRW_OPCODE_AND, temp, op[0], op[1]);
592	 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
593	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
594	 return;
595
596      case ir_unop_f2b:
597	 inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0));
598	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
599	 return;
600
601      case ir_unop_i2b:
602	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
603	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
604	 return;
605
606      case ir_binop_greater:
607      case ir_binop_gequal:
608      case ir_binop_less:
609      case ir_binop_lequal:
610      case ir_binop_equal:
611      case ir_binop_nequal:
612	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
613	 inst->conditional_mod =
614	    brw_conditional_for_comparison(expr->operation);
615	 return;
616
617      case ir_binop_all_equal:
618	 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
619	 inst->conditional_mod = BRW_CONDITIONAL_Z;
620
621	 inst = emit(BRW_OPCODE_IF);
622	 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
623	 return;
624
625      case ir_binop_any_nequal:
626	 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
627	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
628
629	 inst = emit(BRW_OPCODE_IF);
630	 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
631	 return;
632
633      case ir_unop_any:
634	 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
635	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
636
637	 inst = emit(BRW_OPCODE_IF);
638	 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
639	 return;
640
641      default:
642	 assert(!"not reached");
643	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
644	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
645	 return;
646      }
647      return;
648   }
649
650   ir->condition->accept(this);
651
652   vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(),
653			    this->result, src_reg(0));
654   inst->conditional_mod = BRW_CONDITIONAL_NZ;
655}
656
657void
658vec4_visitor::visit(ir_variable *ir)
659{
660   dst_reg *reg = NULL;
661
662   if (variable_storage(ir))
663      return;
664
665   switch (ir->mode) {
666   case ir_var_in:
667      reg = new(mem_ctx) dst_reg(ATTR, ir->location);
668      break;
669
670   case ir_var_out:
671      reg = new(mem_ctx) dst_reg(this, ir->type);
672
673      for (int i = 0; i < type_size(ir->type); i++) {
674	 output_reg[ir->location + i] = *reg;
675	 output_reg[ir->location + i].reg_offset = i;
676	 output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F;
677      }
678      break;
679
680   case ir_var_auto:
681   case ir_var_temporary:
682      reg = new(mem_ctx) dst_reg(this, ir->type);
683      break;
684
685   case ir_var_uniform:
686      reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
687
688      if (!strncmp(ir->name, "gl_", 3)) {
689	 setup_builtin_uniform_values(ir);
690      } else {
691	 setup_uniform_values(ir->location, ir->type);
692      }
693      break;
694
695   default:
696      assert(!"not reached");
697   }
698
699   reg->type = brw_type_for_base_type(ir->type);
700   hash_table_insert(this->variable_ht, reg, ir);
701}
702
703void
704vec4_visitor::visit(ir_loop *ir)
705{
706   dst_reg counter;
707
708   /* We don't want debugging output to print the whole body of the
709    * loop as the annotation.
710    */
711   this->base_ir = NULL;
712
713   if (ir->counter != NULL) {
714      this->base_ir = ir->counter;
715      ir->counter->accept(this);
716      counter = *(variable_storage(ir->counter));
717
718      if (ir->from != NULL) {
719	 this->base_ir = ir->from;
720	 ir->from->accept(this);
721
722	 emit(BRW_OPCODE_MOV, counter, this->result);
723      }
724   }
725
726   emit(BRW_OPCODE_DO);
727
728   if (ir->to) {
729      this->base_ir = ir->to;
730      ir->to->accept(this);
731
732      vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst_null_d(),
733				    src_reg(counter), this->result);
734      inst->conditional_mod = brw_conditional_for_comparison(ir->cmp);
735
736      inst = emit(BRW_OPCODE_BREAK);
737      inst->predicate = BRW_PREDICATE_NORMAL;
738   }
739
740   visit_instructions(&ir->body_instructions);
741
742
743   if (ir->increment) {
744      this->base_ir = ir->increment;
745      ir->increment->accept(this);
746      emit(BRW_OPCODE_ADD, counter, src_reg(counter), this->result);
747   }
748
749   emit(BRW_OPCODE_WHILE);
750}
751
752void
753vec4_visitor::visit(ir_loop_jump *ir)
754{
755   switch (ir->mode) {
756   case ir_loop_jump::jump_break:
757      emit(BRW_OPCODE_BREAK);
758      break;
759   case ir_loop_jump::jump_continue:
760      emit(BRW_OPCODE_CONTINUE);
761      break;
762   }
763}
764
765
766void
767vec4_visitor::visit(ir_function_signature *ir)
768{
769   assert(0);
770   (void)ir;
771}
772
773void
774vec4_visitor::visit(ir_function *ir)
775{
776   /* Ignore function bodies other than main() -- we shouldn't see calls to
777    * them since they should all be inlined.
778    */
779   if (strcmp(ir->name, "main") == 0) {
780      const ir_function_signature *sig;
781      exec_list empty;
782
783      sig = ir->matching_signature(&empty);
784
785      assert(sig);
786
787      visit_instructions(&sig->body);
788   }
789}
790
791GLboolean
792vec4_visitor::try_emit_sat(ir_expression *ir)
793{
794   ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
795   if (!sat_src)
796      return false;
797
798   sat_src->accept(this);
799   src_reg src = this->result;
800
801   this->result = src_reg(this, ir->type);
802   vec4_instruction *inst;
803   inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src);
804   inst->saturate = true;
805
806   return true;
807}
808
809void
810vec4_visitor::emit_bool_comparison(unsigned int op,
811				 dst_reg dst, src_reg src0, src_reg src1)
812{
813   /* original gen4 does destination conversion before comparison. */
814   if (intel->gen < 5)
815      dst.type = src0.type;
816
817   vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1);
818   inst->conditional_mod = brw_conditional_for_comparison(op);
819
820   dst.type = BRW_REGISTER_TYPE_D;
821   emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1));
822}
823
824void
825vec4_visitor::visit(ir_expression *ir)
826{
827   unsigned int operand;
828   src_reg op[Elements(ir->operands)];
829   src_reg result_src;
830   dst_reg result_dst;
831   vec4_instruction *inst;
832
833   if (try_emit_sat(ir))
834      return;
835
836   for (operand = 0; operand < ir->get_num_operands(); operand++) {
837      this->result.file = BAD_FILE;
838      ir->operands[operand]->accept(this);
839      if (this->result.file == BAD_FILE) {
840	 printf("Failed to get tree for expression operand:\n");
841	 ir->operands[operand]->print();
842	 exit(1);
843      }
844      op[operand] = this->result;
845
846      /* Matrix expression operands should have been broken down to vector
847       * operations already.
848       */
849      assert(!ir->operands[operand]->type->is_matrix());
850   }
851
852   int vector_elements = ir->operands[0]->type->vector_elements;
853   if (ir->operands[1]) {
854      vector_elements = MAX2(vector_elements,
855			     ir->operands[1]->type->vector_elements);
856   }
857
858   this->result.file = BAD_FILE;
859
860   /* Storage for our result.  Ideally for an assignment we'd be using
861    * the actual storage for the result here, instead.
862    */
863   result_src = src_reg(this, ir->type);
864   /* convenience for the emit functions below. */
865   result_dst = dst_reg(result_src);
866   /* If nothing special happens, this is the result. */
867   this->result = result_src;
868   /* Limit writes to the channels that will be used by result_src later.
869    * This does limit this temp's use as a temporary for multi-instruction
870    * sequences.
871    */
872   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
873
874   switch (ir->operation) {
875   case ir_unop_logic_not:
876      /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
877       * ones complement of the whole register, not just bit 0.
878       */
879      emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1));
880      break;
881   case ir_unop_neg:
882      op[0].negate = !op[0].negate;
883      this->result = op[0];
884      break;
885   case ir_unop_abs:
886      op[0].abs = true;
887      op[0].negate = false;
888      this->result = op[0];
889      break;
890
891   case ir_unop_sign:
892      emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f));
893
894      inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
895      inst->conditional_mod = BRW_CONDITIONAL_G;
896      inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f));
897      inst->predicate = BRW_PREDICATE_NORMAL;
898
899      inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
900      inst->conditional_mod = BRW_CONDITIONAL_L;
901      inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f));
902      inst->predicate = BRW_PREDICATE_NORMAL;
903
904      break;
905
906   case ir_unop_rcp:
907      emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
908      break;
909
910   case ir_unop_exp2:
911      emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
912      break;
913   case ir_unop_log2:
914      emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
915      break;
916   case ir_unop_exp:
917   case ir_unop_log:
918      assert(!"not reached: should be handled by ir_explog_to_explog2");
919      break;
920   case ir_unop_sin:
921   case ir_unop_sin_reduced:
922      emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
923      break;
924   case ir_unop_cos:
925   case ir_unop_cos_reduced:
926      emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
927      break;
928
929   case ir_unop_dFdx:
930   case ir_unop_dFdy:
931      assert(!"derivatives not valid in vertex shader");
932      break;
933
934   case ir_unop_noise:
935      assert(!"not reached: should be handled by lower_noise");
936      break;
937
938   case ir_binop_add:
939      emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]);
940      break;
941   case ir_binop_sub:
942      assert(!"not reached: should be handled by ir_sub_to_add_neg");
943      break;
944
945   case ir_binop_mul:
946      emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]);
947      break;
948   case ir_binop_div:
949      assert(!"not reached: should be handled by ir_div_to_mul_rcp");
950   case ir_binop_mod:
951      assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
952      break;
953
954   case ir_binop_less:
955   case ir_binop_greater:
956   case ir_binop_lequal:
957   case ir_binop_gequal:
958   case ir_binop_equal:
959   case ir_binop_nequal: {
960      dst_reg temp = result_dst;
961      /* original gen4 does implicit conversion before comparison. */
962      if (intel->gen < 5)
963	 temp.type = op[0].type;
964
965      inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
966      inst->conditional_mod = brw_conditional_for_comparison(ir->operation);
967      emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1));
968      break;
969   }
970
971   case ir_binop_all_equal:
972      /* "==" operator producing a scalar boolean. */
973      if (ir->operands[0]->type->is_vector() ||
974	  ir->operands[1]->type->is_vector()) {
975	 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
976	 inst->conditional_mod = BRW_CONDITIONAL_Z;
977
978	 emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
979	 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
980	 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
981      } else {
982	 dst_reg temp = result_dst;
983	 /* original gen4 does implicit conversion before comparison. */
984	 if (intel->gen < 5)
985	    temp.type = op[0].type;
986
987	 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
988	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
989	 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
990      }
991      break;
992   case ir_binop_any_nequal:
993      /* "!=" operator producing a scalar boolean. */
994      if (ir->operands[0]->type->is_vector() ||
995	  ir->operands[1]->type->is_vector()) {
996	 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
997	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
998
999	 emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
1000	 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
1001	 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1002      } else {
1003	 dst_reg temp = result_dst;
1004	 /* original gen4 does implicit conversion before comparison. */
1005	 if (intel->gen < 5)
1006	    temp.type = op[0].type;
1007
1008	 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
1009	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1010	 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
1011      }
1012      break;
1013
1014   case ir_unop_any:
1015      inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
1016      inst->conditional_mod = BRW_CONDITIONAL_NZ;
1017
1018      emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
1019
1020      inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
1021      inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1022      break;
1023
1024   case ir_binop_logic_xor:
1025      emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
1026      break;
1027
1028   case ir_binop_logic_or:
1029      emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
1030      break;
1031
1032   case ir_binop_logic_and:
1033      emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
1034      break;
1035
1036   case ir_binop_dot:
1037      assert(ir->operands[0]->type->is_vector());
1038      assert(ir->operands[0]->type == ir->operands[1]->type);
1039      emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1040      break;
1041
1042   case ir_unop_sqrt:
1043      emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1044      break;
1045   case ir_unop_rsq:
1046      emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1047      break;
1048   case ir_unop_i2f:
1049   case ir_unop_i2u:
1050   case ir_unop_u2i:
1051   case ir_unop_u2f:
1052   case ir_unop_b2f:
1053   case ir_unop_b2i:
1054   case ir_unop_f2i:
1055      emit(BRW_OPCODE_MOV, result_dst, op[0]);
1056      break;
1057   case ir_unop_f2b:
1058   case ir_unop_i2b: {
1059      dst_reg temp = result_dst;
1060      /* original gen4 does implicit conversion before comparison. */
1061      if (intel->gen < 5)
1062	 temp.type = op[0].type;
1063
1064      inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f));
1065      inst->conditional_mod = BRW_CONDITIONAL_NZ;
1066      inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1));
1067      break;
1068   }
1069
1070   case ir_unop_trunc:
1071      emit(BRW_OPCODE_RNDZ, result_dst, op[0]);
1072      break;
1073   case ir_unop_ceil:
1074      op[0].negate = !op[0].negate;
1075      inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
1076      this->result.negate = true;
1077      break;
1078   case ir_unop_floor:
1079      inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
1080      break;
1081   case ir_unop_fract:
1082      inst = emit(BRW_OPCODE_FRC, result_dst, op[0]);
1083      break;
1084   case ir_unop_round_even:
1085      emit(BRW_OPCODE_RNDE, result_dst, op[0]);
1086      break;
1087
1088   case ir_binop_min:
1089      inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
1090      inst->conditional_mod = BRW_CONDITIONAL_L;
1091
1092      inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1093      inst->predicate = BRW_PREDICATE_NORMAL;
1094      break;
1095   case ir_binop_max:
1096      inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
1097      inst->conditional_mod = BRW_CONDITIONAL_G;
1098
1099      inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1100      inst->predicate = BRW_PREDICATE_NORMAL;
1101      break;
1102
1103   case ir_binop_pow:
1104      emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1105      break;
1106
1107   case ir_unop_bit_not:
1108      inst = emit(BRW_OPCODE_NOT, result_dst, op[0]);
1109      break;
1110   case ir_binop_bit_and:
1111      inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
1112      break;
1113   case ir_binop_bit_xor:
1114      inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
1115      break;
1116   case ir_binop_bit_or:
1117      inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
1118      break;
1119
1120   case ir_binop_lshift:
1121   case ir_binop_rshift:
1122      assert(!"GLSL 1.30 features unsupported");
1123      break;
1124
1125   case ir_quadop_vector:
1126      assert(!"not reached: should be handled by lower_quadop_vector");
1127      break;
1128   }
1129}
1130
1131
1132void
1133vec4_visitor::visit(ir_swizzle *ir)
1134{
1135   src_reg src;
1136   int i = 0;
1137   int swizzle[4];
1138
1139   /* Note that this is only swizzles in expressions, not those on the left
1140    * hand side of an assignment, which do write masking.  See ir_assignment
1141    * for that.
1142    */
1143
1144   ir->val->accept(this);
1145   src = this->result;
1146   assert(src.file != BAD_FILE);
1147
1148   for (i = 0; i < ir->type->vector_elements; i++) {
1149      switch (i) {
1150      case 0:
1151	 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
1152	 break;
1153      case 1:
1154	 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
1155	 break;
1156      case 2:
1157	 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
1158	 break;
1159      case 3:
1160	 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
1161	    break;
1162      }
1163   }
1164   for (; i < 4; i++) {
1165      /* Replicate the last channel out. */
1166      swizzle[i] = swizzle[ir->type->vector_elements - 1];
1167   }
1168
1169   src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1170
1171   this->result = src;
1172}
1173
1174void
1175vec4_visitor::visit(ir_dereference_variable *ir)
1176{
1177   const struct glsl_type *type = ir->type;
1178   dst_reg *reg = variable_storage(ir->var);
1179
1180   if (!reg) {
1181      fail("Failed to find variable storage for %s\n", ir->var->name);
1182      this->result = src_reg(brw_null_reg());
1183      return;
1184   }
1185
1186   this->result = src_reg(*reg);
1187
1188   if (type->is_scalar() || type->is_vector() || type->is_matrix())
1189      this->result.swizzle = swizzle_for_size(type->vector_elements);
1190}
1191
1192void
1193vec4_visitor::visit(ir_dereference_array *ir)
1194{
1195   ir_constant *constant_index;
1196   src_reg src;
1197   int element_size = type_size(ir->type);
1198
1199   constant_index = ir->array_index->constant_expression_value();
1200
1201   ir->array->accept(this);
1202   src = this->result;
1203
1204   if (constant_index) {
1205      src.reg_offset += constant_index->value.i[0] * element_size;
1206   } else {
1207      /* Variable index array dereference.  It eats the "vec4" of the
1208       * base of the array and an index that offsets the Mesa register
1209       * index.
1210       */
1211      ir->array_index->accept(this);
1212
1213      src_reg index_reg;
1214
1215      if (element_size == 1) {
1216	 index_reg = this->result;
1217      } else {
1218	 index_reg = src_reg(this, glsl_type::int_type);
1219
1220	 emit(BRW_OPCODE_MUL, dst_reg(index_reg),
1221	      this->result, src_reg(element_size));
1222      }
1223
1224      if (src.reladdr) {
1225	 src_reg temp = src_reg(this, glsl_type::int_type);
1226
1227	 emit(BRW_OPCODE_ADD, dst_reg(temp), *src.reladdr, index_reg);
1228
1229	 index_reg = temp;
1230      }
1231
1232      src.reladdr = ralloc(mem_ctx, src_reg);
1233      memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1234   }
1235
1236   /* If the type is smaller than a vec4, replicate the last channel out. */
1237   if (ir->type->is_scalar() || ir->type->is_vector())
1238      src.swizzle = swizzle_for_size(ir->type->vector_elements);
1239   else
1240      src.swizzle = BRW_SWIZZLE_NOOP;
1241   src.type = brw_type_for_base_type(ir->type);
1242
1243   this->result = src;
1244}
1245
1246void
1247vec4_visitor::visit(ir_dereference_record *ir)
1248{
1249   unsigned int i;
1250   const glsl_type *struct_type = ir->record->type;
1251   int offset = 0;
1252
1253   ir->record->accept(this);
1254
1255   for (i = 0; i < struct_type->length; i++) {
1256      if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1257	 break;
1258      offset += type_size(struct_type->fields.structure[i].type);
1259   }
1260
1261   /* If the type is smaller than a vec4, replicate the last channel out. */
1262   if (ir->type->is_scalar() || ir->type->is_vector())
1263      this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1264   else
1265      this->result.swizzle = BRW_SWIZZLE_NOOP;
1266   this->result.type = brw_type_for_base_type(ir->type);
1267
1268   this->result.reg_offset += offset;
1269}
1270
1271/**
1272 * We want to be careful in assignment setup to hit the actual storage
1273 * instead of potentially using a temporary like we might with the
1274 * ir_dereference handler.
1275 */
1276static dst_reg
1277get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1278{
1279   /* The LHS must be a dereference.  If the LHS is a variable indexed array
1280    * access of a vector, it must be separated into a series conditional moves
1281    * before reaching this point (see ir_vec_index_to_cond_assign).
1282    */
1283   assert(ir->as_dereference());
1284   ir_dereference_array *deref_array = ir->as_dereference_array();
1285   if (deref_array) {
1286      assert(!deref_array->array->type->is_vector());
1287   }
1288
1289   /* Use the rvalue deref handler for the most part.  We'll ignore
1290    * swizzles in it and write swizzles using writemask, though.
1291    */
1292   ir->accept(v);
1293   return dst_reg(v->result);
1294}
1295
1296void
1297vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
1298			      const struct glsl_type *type, bool predicated)
1299{
1300   if (type->base_type == GLSL_TYPE_STRUCT) {
1301      for (unsigned int i = 0; i < type->length; i++) {
1302	 emit_block_move(dst, src, type->fields.structure[i].type, predicated);
1303      }
1304      return;
1305   }
1306
1307   if (type->is_array()) {
1308      for (unsigned int i = 0; i < type->length; i++) {
1309	 emit_block_move(dst, src, type->fields.array, predicated);
1310      }
1311      return;
1312   }
1313
1314   if (type->is_matrix()) {
1315      const struct glsl_type *vec_type;
1316
1317      vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
1318					 type->vector_elements, 1);
1319
1320      for (int i = 0; i < type->matrix_columns; i++) {
1321	 emit_block_move(dst, src, vec_type, predicated);
1322      }
1323      return;
1324   }
1325
1326   assert(type->is_scalar() || type->is_vector());
1327
1328   dst->type = brw_type_for_base_type(type);
1329   src->type = dst->type;
1330
1331   dst->writemask = (1 << type->vector_elements) - 1;
1332
1333   /* Do we need to worry about swizzling a swizzle? */
1334   assert(src->swizzle = BRW_SWIZZLE_NOOP);
1335   src->swizzle = swizzle_for_size(type->vector_elements);
1336
1337   vec4_instruction *inst = emit(BRW_OPCODE_MOV, *dst, *src);
1338   if (predicated)
1339      inst->predicate = BRW_PREDICATE_NORMAL;
1340
1341   dst->reg_offset++;
1342   src->reg_offset++;
1343}
1344
1345
1346/* If the RHS processing resulted in an instruction generating a
1347 * temporary value, and it would be easy to rewrite the instruction to
1348 * generate its result right into the LHS instead, do so.  This ends
1349 * up reliably removing instructions where it can be tricky to do so
1350 * later without real UD chain information.
1351 */
1352bool
1353vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
1354				     dst_reg dst,
1355				     src_reg src,
1356				     vec4_instruction *pre_rhs_inst,
1357				     vec4_instruction *last_rhs_inst)
1358{
1359   /* This could be supported, but it would take more smarts. */
1360   if (ir->condition)
1361      return false;
1362
1363   if (pre_rhs_inst == last_rhs_inst)
1364      return false; /* No instructions generated to work with. */
1365
1366   /* Make sure the last instruction generated our source reg. */
1367   if (src.file != GRF ||
1368       src.file != last_rhs_inst->dst.file ||
1369       src.reg != last_rhs_inst->dst.reg ||
1370       src.reg_offset != last_rhs_inst->dst.reg_offset ||
1371       src.reladdr ||
1372       src.abs ||
1373       src.negate ||
1374       last_rhs_inst->predicate != BRW_PREDICATE_NONE)
1375      return false;
1376
1377   /* Check that that last instruction fully initialized the channels
1378    * we want to use, in the order we want to use them.  We could
1379    * potentially reswizzle the operands of many instructions so that
1380    * we could handle out of order channels, but don't yet.
1381    */
1382   for (int i = 0; i < 4; i++) {
1383      if (dst.writemask & (1 << i)) {
1384	 if (!(last_rhs_inst->dst.writemask & (1 << i)))
1385	    return false;
1386
1387	 if (BRW_GET_SWZ(src.swizzle, i) != i)
1388	    return false;
1389      }
1390   }
1391
1392   /* Success!  Rewrite the instruction. */
1393   last_rhs_inst->dst.file = dst.file;
1394   last_rhs_inst->dst.reg = dst.reg;
1395   last_rhs_inst->dst.reg_offset = dst.reg_offset;
1396   last_rhs_inst->dst.reladdr = dst.reladdr;
1397   last_rhs_inst->dst.writemask &= dst.writemask;
1398
1399   return true;
1400}
1401
1402void
1403vec4_visitor::visit(ir_assignment *ir)
1404{
1405   dst_reg dst = get_assignment_lhs(ir->lhs, this);
1406
1407   if (!ir->lhs->type->is_scalar() &&
1408       !ir->lhs->type->is_vector()) {
1409      ir->rhs->accept(this);
1410      src_reg src = this->result;
1411
1412      if (ir->condition) {
1413	 emit_bool_to_cond_code(ir->condition);
1414      }
1415
1416      emit_block_move(&dst, &src, ir->rhs->type, ir->condition != NULL);
1417      return;
1418   }
1419
1420   /* Now we're down to just a scalar/vector with writemasks. */
1421   int i;
1422
1423   vec4_instruction *pre_rhs_inst, *last_rhs_inst;
1424   pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1425
1426   ir->rhs->accept(this);
1427
1428   last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1429
1430   src_reg src = this->result;
1431
1432   int swizzles[4];
1433   int first_enabled_chan = 0;
1434   int src_chan = 0;
1435
1436   assert(ir->lhs->type->is_vector() ||
1437	  ir->lhs->type->is_scalar());
1438   dst.writemask = ir->write_mask;
1439
1440   for (int i = 0; i < 4; i++) {
1441      if (dst.writemask & (1 << i)) {
1442	 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
1443	 break;
1444      }
1445   }
1446
1447   /* Swizzle a small RHS vector into the channels being written.
1448    *
1449    * glsl ir treats write_mask as dictating how many channels are
1450    * present on the RHS while in our instructions we need to make
1451    * those channels appear in the slots of the vec4 they're written to.
1452    */
1453   for (int i = 0; i < 4; i++) {
1454      if (dst.writemask & (1 << i))
1455	 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
1456      else
1457	 swizzles[i] = first_enabled_chan;
1458   }
1459   src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
1460			      swizzles[2], swizzles[3]);
1461
1462   if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
1463      return;
1464   }
1465
1466   if (ir->condition) {
1467      emit_bool_to_cond_code(ir->condition);
1468   }
1469
1470   for (i = 0; i < type_size(ir->lhs->type); i++) {
1471      vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src);
1472
1473      if (ir->condition)
1474	 inst->predicate = BRW_PREDICATE_NORMAL;
1475
1476      dst.reg_offset++;
1477      src.reg_offset++;
1478   }
1479}
1480
1481void
1482vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
1483{
1484   if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1485      foreach_list(node, &ir->components) {
1486	 ir_constant *field_value = (ir_constant *)node;
1487
1488	 emit_constant_values(dst, field_value);
1489      }
1490      return;
1491   }
1492
1493   if (ir->type->is_array()) {
1494      for (unsigned int i = 0; i < ir->type->length; i++) {
1495	 emit_constant_values(dst, ir->array_elements[i]);
1496      }
1497      return;
1498   }
1499
1500   if (ir->type->is_matrix()) {
1501      for (int i = 0; i < ir->type->matrix_columns; i++) {
1502	 for (int j = 0; j < ir->type->vector_elements; j++) {
1503	    dst->writemask = 1 << j;
1504	    dst->type = BRW_REGISTER_TYPE_F;
1505
1506	    emit(BRW_OPCODE_MOV, *dst,
1507		 src_reg(ir->value.f[i * ir->type->vector_elements + j]));
1508	 }
1509	 dst->reg_offset++;
1510      }
1511      return;
1512   }
1513
1514   for (int i = 0; i < ir->type->vector_elements; i++) {
1515      dst->writemask = 1 << i;
1516      dst->type = brw_type_for_base_type(ir->type);
1517
1518      switch (ir->type->base_type) {
1519      case GLSL_TYPE_FLOAT:
1520	 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.f[i]));
1521	 break;
1522      case GLSL_TYPE_INT:
1523	 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.i[i]));
1524	 break;
1525      case GLSL_TYPE_UINT:
1526	 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.u[i]));
1527	 break;
1528      case GLSL_TYPE_BOOL:
1529	 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.b[i]));
1530	 break;
1531      default:
1532	 assert(!"Non-float/uint/int/bool constant");
1533	 break;
1534      }
1535   }
1536   dst->reg_offset++;
1537}
1538
1539void
1540vec4_visitor::visit(ir_constant *ir)
1541{
1542   dst_reg dst = dst_reg(this, ir->type);
1543   this->result = src_reg(dst);
1544
1545   emit_constant_values(&dst, ir);
1546}
1547
1548void
1549vec4_visitor::visit(ir_call *ir)
1550{
1551   assert(!"not reached");
1552}
1553
1554void
1555vec4_visitor::visit(ir_texture *ir)
1556{
1557   assert(!"not reached");
1558}
1559
1560void
1561vec4_visitor::visit(ir_return *ir)
1562{
1563   assert(!"not reached");
1564}
1565
1566void
1567vec4_visitor::visit(ir_discard *ir)
1568{
1569   assert(!"not reached");
1570}
1571
1572void
1573vec4_visitor::visit(ir_if *ir)
1574{
1575   /* Don't point the annotation at the if statement, because then it plus
1576    * the then and else blocks get printed.
1577    */
1578   this->base_ir = ir->condition;
1579
1580   if (intel->gen == 6) {
1581      emit_if_gen6(ir);
1582   } else {
1583      emit_bool_to_cond_code(ir->condition);
1584      vec4_instruction *inst = emit(BRW_OPCODE_IF);
1585      inst->predicate = BRW_PREDICATE_NORMAL;
1586   }
1587
1588   visit_instructions(&ir->then_instructions);
1589
1590   if (!ir->else_instructions.is_empty()) {
1591      this->base_ir = ir->condition;
1592      emit(BRW_OPCODE_ELSE);
1593
1594      visit_instructions(&ir->else_instructions);
1595   }
1596
1597   this->base_ir = ir->condition;
1598   emit(BRW_OPCODE_ENDIF);
1599}
1600
1601int
1602vec4_visitor::emit_vue_header_gen4(int header_mrf)
1603{
1604   /* Get the position */
1605   src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
1606
1607   /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
1608   dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
1609
1610   current_annotation = "NDC";
1611   dst_reg ndc_w = ndc;
1612   ndc_w.writemask = WRITEMASK_W;
1613   src_reg pos_w = pos;
1614   pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
1615   emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
1616
1617   dst_reg ndc_xyz = ndc;
1618   ndc_xyz.writemask = WRITEMASK_XYZ;
1619
1620   emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w));
1621
1622   if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
1623       c->key.nr_userclip || brw->has_negative_rhw_bug) {
1624      dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
1625      GLuint i;
1626
1627      emit(BRW_OPCODE_MOV, header1, 0u);
1628
1629      if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1630	 assert(!"finishme: psiz");
1631	 src_reg psiz;
1632
1633	 header1.writemask = WRITEMASK_W;
1634	 emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11);
1635	 emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8);
1636      }
1637
1638      for (i = 0; i < c->key.nr_userclip; i++) {
1639	 vec4_instruction *inst;
1640
1641	 inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()),
1642		     pos, src_reg(c->userplane[i]));
1643	 inst->conditional_mod = BRW_CONDITIONAL_L;
1644
1645	 emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i);
1646	 inst->predicate = BRW_PREDICATE_NORMAL;
1647      }
1648
1649      /* i965 clipping workaround:
1650       * 1) Test for -ve rhw
1651       * 2) If set,
1652       *      set ndc = (0,0,0,0)
1653       *      set ucp[6] = 1
1654       *
1655       * Later, clipping will detect ucp[6] and ensure the primitive is
1656       * clipped against all fixed planes.
1657       */
1658      if (brw->has_negative_rhw_bug) {
1659#if 0
1660	 /* FINISHME */
1661	 brw_CMP(p,
1662		 vec8(brw_null_reg()),
1663		 BRW_CONDITIONAL_L,
1664		 brw_swizzle1(ndc, 3),
1665		 brw_imm_f(0));
1666
1667	 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
1668	 brw_MOV(p, ndc, brw_imm_f(0));
1669	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1670#endif
1671      }
1672
1673      header1.writemask = WRITEMASK_XYZW;
1674      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1));
1675   } else {
1676      emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++),
1677				  BRW_REGISTER_TYPE_UD), 0u);
1678   }
1679
1680   if (intel->gen == 5) {
1681      /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
1682       * dword 0-3 (m1) of the header is indices, point width, clip flags.
1683       * dword 4-7 (m2) is the ndc position (set above)
1684       * dword 8-11 (m3) of the vertex header is the 4D space position
1685       * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
1686       * m6 is a pad so that the vertex element data is aligned
1687       * m7 is the first vertex data we fill.
1688       */
1689      current_annotation = "NDC";
1690      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
1691
1692      current_annotation = "gl_Position";
1693      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
1694
1695      /* user clip distance. */
1696      header_mrf += 2;
1697
1698      /* Pad so that vertex element data is aligned. */
1699      header_mrf++;
1700   } else {
1701      /* There are 8 dwords in VUE header pre-Ironlake:
1702       * dword 0-3 (m1) is indices, point width, clip flags.
1703       * dword 4-7 (m2) is ndc position (set above)
1704       *
1705       * dword 8-11 (m3) is the first vertex data.
1706       */
1707      current_annotation = "NDC";
1708      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
1709
1710      current_annotation = "gl_Position";
1711      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
1712   }
1713
1714   return header_mrf;
1715}
1716
1717int
1718vec4_visitor::emit_vue_header_gen6(int header_mrf)
1719{
1720   struct brw_reg reg;
1721
1722   /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
1723    * dword 0-3 (m2) of the header is indices, point width, clip flags.
1724    * dword 4-7 (m3) is the 4D space position
1725    * dword 8-15 (m4,m5) of the vertex header is the user clip distance if
1726    * enabled.
1727    *
1728    * m4 or 6 is the first vertex element data we fill.
1729    */
1730
1731   current_annotation = "indices, point width, clip flags";
1732   reg = brw_message_reg(header_mrf++);
1733   emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0));
1734   if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1735      emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W),
1736	   src_reg(output_reg[VERT_RESULT_PSIZ]));
1737   }
1738
1739   current_annotation = "gl_Position";
1740   emit(BRW_OPCODE_MOV,
1741	brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS]));
1742
1743   current_annotation = "user clip distances";
1744   if (c->key.nr_userclip) {
1745      for (int i = 0; i < c->key.nr_userclip; i++) {
1746	 struct brw_reg m;
1747	 if (i < 4)
1748	    m = brw_message_reg(header_mrf);
1749	 else
1750	    m = brw_message_reg(header_mrf + 1);
1751
1752	 emit(BRW_OPCODE_DP4,
1753	      dst_reg(brw_writemask(m, 1 << (i & 3))),
1754	      src_reg(c->userplane[i]));
1755      }
1756      header_mrf += 2;
1757   }
1758
1759   current_annotation = NULL;
1760
1761   return header_mrf;
1762}
1763
1764static int
1765align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
1766{
1767   struct intel_context *intel = &brw->intel;
1768
1769   if (intel->gen >= 6) {
1770      /* URB data written (does not include the message header reg) must
1771       * be a multiple of 256 bits, or 2 VS registers.  See vol5c.5,
1772       * section 5.4.3.2.2: URB_INTERLEAVED.
1773       *
1774       * URB entries are allocated on a multiple of 1024 bits, so an
1775       * extra 128 bits written here to make the end align to 256 is
1776       * no problem.
1777       */
1778      if ((mlen % 2) != 1)
1779	 mlen++;
1780   }
1781
1782   return mlen;
1783}
1784
1785/**
1786 * Generates the VUE payload plus the 1 or 2 URB write instructions to
1787 * complete the VS thread.
1788 *
1789 * The VUE layout is documented in Volume 2a.
1790 */
1791void
1792vec4_visitor::emit_urb_writes()
1793{
1794   /* MRF 0 is reserved for the debugger, so start with message header
1795    * in MRF 1.
1796    */
1797   int base_mrf = 1;
1798   int mrf = base_mrf;
1799   int urb_entry_size;
1800   uint64_t outputs_remaining = c->prog_data.outputs_written;
1801   /* In the process of generating our URB write message contents, we
1802    * may need to unspill a register or load from an array.  Those
1803    * reads would use MRFs 14-15.
1804    */
1805   int max_usable_mrf = 13;
1806
1807   /* FINISHME: edgeflag */
1808
1809   /* First mrf is the g0-based message header containing URB handles and such,
1810    * which is implied in VS_OPCODE_URB_WRITE.
1811    */
1812   mrf++;
1813
1814   if (intel->gen >= 6) {
1815      mrf = emit_vue_header_gen6(mrf);
1816   } else {
1817      mrf = emit_vue_header_gen4(mrf);
1818   }
1819
1820   /* Set up the VUE data for the first URB write */
1821   int attr;
1822   for (attr = 0; attr < VERT_RESULT_MAX; attr++) {
1823      if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
1824	 continue;
1825
1826      outputs_remaining &= ~BITFIELD64_BIT(attr);
1827
1828      /* This is set up in the VUE header. */
1829      if (attr == VERT_RESULT_HPOS)
1830	 continue;
1831
1832      /* This is loaded into the VUE header, and thus doesn't occupy
1833       * an attribute slot.
1834       */
1835      if (attr == VERT_RESULT_PSIZ)
1836	 continue;
1837
1838      vec4_instruction *inst = emit(BRW_OPCODE_MOV, brw_message_reg(mrf++),
1839				    src_reg(output_reg[attr]));
1840
1841      if ((attr == VERT_RESULT_COL0 ||
1842	   attr == VERT_RESULT_COL1 ||
1843	   attr == VERT_RESULT_BFC0 ||
1844	   attr == VERT_RESULT_BFC1) &&
1845	  c->key.clamp_vertex_color) {
1846	 inst->saturate = true;
1847      }
1848
1849      /* If this was MRF 15, we can't fit anything more into this URB
1850       * WRITE.  Note that base_mrf of 1 means that MRF 15 is an
1851       * even-numbered amount of URB write data, which will meet
1852       * gen6's requirements for length alignment.
1853       */
1854      if (mrf > max_usable_mrf) {
1855	 attr++;
1856	 break;
1857      }
1858   }
1859
1860   vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
1861   inst->base_mrf = base_mrf;
1862   inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
1863   inst->eot = !outputs_remaining;
1864
1865   urb_entry_size = mrf - base_mrf;
1866
1867   /* Optional second URB write */
1868   if (outputs_remaining) {
1869      mrf = base_mrf + 1;
1870
1871      for (; attr < VERT_RESULT_MAX; attr++) {
1872	 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
1873	    continue;
1874
1875	 assert(mrf < max_usable_mrf);
1876
1877	 emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
1878      }
1879
1880      inst = emit(VS_OPCODE_URB_WRITE);
1881      inst->base_mrf = base_mrf;
1882      inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
1883      inst->eot = true;
1884      /* URB destination offset.  In the previous write, we got MRFs
1885       * 2-13 minus the one header MRF, so 12 regs.  URB offset is in
1886       * URB row increments, and each of our MRFs is half of one of
1887       * those, since we're doing interleaved writes.
1888       */
1889      inst->offset = (max_usable_mrf - base_mrf) / 2;
1890
1891      urb_entry_size += mrf - base_mrf;
1892   }
1893
1894   if (intel->gen == 6)
1895      c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8;
1896   else
1897      c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4;
1898}
1899
1900src_reg
1901vec4_visitor::get_scratch_offset(vec4_instruction *inst,
1902				 src_reg *reladdr, int reg_offset)
1903{
1904   /* Because we store the values to scratch interleaved like our
1905    * vertex data, we need to scale the vec4 index by 2.
1906    */
1907   int message_header_scale = 2;
1908
1909   /* Pre-gen6, the message header uses byte offsets instead of vec4
1910    * (16-byte) offset units.
1911    */
1912   if (intel->gen < 6)
1913      message_header_scale *= 16;
1914
1915   if (reladdr) {
1916      src_reg index = src_reg(this, glsl_type::int_type);
1917
1918      vec4_instruction *add = emit(BRW_OPCODE_ADD,
1919				   dst_reg(index),
1920				   *reladdr,
1921				   src_reg(reg_offset));
1922      /* Move our new instruction from the tail to its correct place. */
1923      add->remove();
1924      inst->insert_before(add);
1925
1926      vec4_instruction *mul = emit(BRW_OPCODE_MUL, dst_reg(index),
1927				   index, src_reg(message_header_scale));
1928      mul->remove();
1929      inst->insert_before(mul);
1930
1931      return index;
1932   } else {
1933      return src_reg(reg_offset * message_header_scale);
1934   }
1935}
1936
1937/**
1938 * Emits an instruction before @inst to load the value named by @orig_src
1939 * from scratch space at @base_offset to @temp.
1940 */
1941void
1942vec4_visitor::emit_scratch_read(vec4_instruction *inst,
1943				dst_reg temp, src_reg orig_src,
1944				int base_offset)
1945{
1946   int reg_offset = base_offset + orig_src.reg_offset;
1947   src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
1948
1949   vec4_instruction *scratch_read_inst = emit(VS_OPCODE_SCRATCH_READ,
1950					      temp, index);
1951
1952   scratch_read_inst->base_mrf = 14;
1953   scratch_read_inst->mlen = 1;
1954   /* Move our instruction from the tail to its correct place. */
1955   scratch_read_inst->remove();
1956   inst->insert_before(scratch_read_inst);
1957}
1958
1959/**
1960 * Emits an instruction after @inst to store the value to be written
1961 * to @orig_dst to scratch space at @base_offset, from @temp.
1962 */
1963void
1964vec4_visitor::emit_scratch_write(vec4_instruction *inst,
1965				 src_reg temp, dst_reg orig_dst,
1966				 int base_offset)
1967{
1968   int reg_offset = base_offset + orig_dst.reg_offset;
1969   src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset);
1970
1971   dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
1972				       orig_dst.writemask));
1973   vec4_instruction *scratch_write_inst = emit(VS_OPCODE_SCRATCH_WRITE,
1974					       dst, temp, index);
1975   scratch_write_inst->base_mrf = 13;
1976   scratch_write_inst->mlen = 2;
1977   scratch_write_inst->predicate = inst->predicate;
1978   /* Move our instruction from the tail to its correct place. */
1979   scratch_write_inst->remove();
1980   inst->insert_after(scratch_write_inst);
1981}
1982
1983/**
1984 * We can't generally support array access in GRF space, because a
1985 * single instruction's destination can only span 2 contiguous
1986 * registers.  So, we send all GRF arrays that get variable index
1987 * access to scratch space.
1988 */
1989void
1990vec4_visitor::move_grf_array_access_to_scratch()
1991{
1992   int scratch_loc[this->virtual_grf_count];
1993
1994   for (int i = 0; i < this->virtual_grf_count; i++) {
1995      scratch_loc[i] = -1;
1996   }
1997
1998   /* First, calculate the set of virtual GRFs that need to be punted
1999    * to scratch due to having any array access on them, and where in
2000    * scratch.
2001    */
2002   foreach_list(node, &this->instructions) {
2003      vec4_instruction *inst = (vec4_instruction *)node;
2004
2005      if (inst->dst.file == GRF && inst->dst.reladdr &&
2006	  scratch_loc[inst->dst.reg] == -1) {
2007	 scratch_loc[inst->dst.reg] = c->last_scratch;
2008	 c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4;
2009      }
2010
2011      for (int i = 0 ; i < 3; i++) {
2012	 src_reg *src = &inst->src[i];
2013
2014	 if (src->file == GRF && src->reladdr &&
2015	     scratch_loc[src->reg] == -1) {
2016	    scratch_loc[src->reg] = c->last_scratch;
2017	    c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4;
2018	 }
2019      }
2020   }
2021
2022   /* Now, for anything that will be accessed through scratch, rewrite
2023    * it to load/store.  Note that this is a _safe list walk, because
2024    * we may generate a new scratch_write instruction after the one
2025    * we're processing.
2026    */
2027   foreach_list_safe(node, &this->instructions) {
2028      vec4_instruction *inst = (vec4_instruction *)node;
2029
2030      /* Set up the annotation tracking for new generated instructions. */
2031      base_ir = inst->ir;
2032      current_annotation = inst->annotation;
2033
2034      if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
2035	 src_reg temp = src_reg(this, glsl_type::vec4_type);
2036
2037	 emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]);
2038
2039	 inst->dst.file = temp.file;
2040	 inst->dst.reg = temp.reg;
2041	 inst->dst.reg_offset = temp.reg_offset;
2042	 inst->dst.reladdr = NULL;
2043      }
2044
2045      for (int i = 0 ; i < 3; i++) {
2046	 if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
2047	    continue;
2048
2049	 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2050
2051	 emit_scratch_read(inst, temp, inst->src[i],
2052			   scratch_loc[inst->src[i].reg]);
2053
2054	 inst->src[i].file = temp.file;
2055	 inst->src[i].reg = temp.reg;
2056	 inst->src[i].reg_offset = temp.reg_offset;
2057	 inst->src[i].reladdr = NULL;
2058      }
2059   }
2060}
2061
2062
2063vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
2064			   struct gl_shader_program *prog,
2065			   struct brw_shader *shader)
2066{
2067   this->c = c;
2068   this->p = &c->func;
2069   this->brw = p->brw;
2070   this->intel = &brw->intel;
2071   this->ctx = &intel->ctx;
2072   this->prog = prog;
2073   this->shader = shader;
2074
2075   this->mem_ctx = ralloc_context(NULL);
2076   this->failed = false;
2077
2078   this->base_ir = NULL;
2079   this->current_annotation = NULL;
2080
2081   this->c = c;
2082   this->vp = prog->VertexProgram;
2083   this->prog_data = &c->prog_data;
2084
2085   this->variable_ht = hash_table_ctor(0,
2086				       hash_table_pointer_hash,
2087				       hash_table_pointer_compare);
2088
2089   this->virtual_grf_sizes = NULL;
2090   this->virtual_grf_count = 0;
2091   this->virtual_grf_array_size = 0;
2092
2093   this->uniforms = 0;
2094
2095   this->variable_ht = hash_table_ctor(0,
2096				       hash_table_pointer_hash,
2097				       hash_table_pointer_compare);
2098}
2099
2100vec4_visitor::~vec4_visitor()
2101{
2102   hash_table_dtor(this->variable_ht);
2103}
2104
2105
2106void
2107vec4_visitor::fail(const char *format, ...)
2108{
2109   va_list va;
2110   char *msg;
2111
2112   if (failed)
2113      return;
2114
2115   failed = true;
2116
2117   va_start(va, format);
2118   msg = ralloc_vasprintf(mem_ctx, format, va);
2119   va_end(va);
2120   msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
2121
2122   this->fail_msg = msg;
2123
2124   if (INTEL_DEBUG & DEBUG_VS) {
2125      fprintf(stderr, "%s",  msg);
2126   }
2127}
2128
2129} /* namespace brw */
2130