1f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include <float.h>
2f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "pipe/p_context.h"
3f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "pipe/p_defines.h"
4f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "pipe/p_state.h"
5f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "util/u_linkage.h"
6f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "util/u_inlines.h"
7f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "util/u_debug.h"
8f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
9f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "pipe/p_shader_tokens.h"
10f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "tgsi/tgsi_parse.h"
11f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "tgsi/tgsi_util.h"
12f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "tgsi/tgsi_dump.h"
13f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "tgsi/tgsi_ureg.h"
14f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
15f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "nv30-40_3d.xml.h"
16f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "nv30_context.h"
17f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "nvfx_shader.h"
18f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
19f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct nvfx_fpc {
20f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct nv30_fragprog *fp;
21f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
22f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned max_temps;
23f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned long long r_temps;
24f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned long long r_temps_discard;
25f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct nvfx_reg r_result[PIPE_MAX_SHADER_OUTPUTS];
26f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct nvfx_reg r_input[PIPE_MAX_SHADER_INPUTS];
27f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct nvfx_reg *r_temp;
28f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
29f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int num_regs;
30f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
31f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned inst_offset;
32f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned have_const;
33f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
34f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct util_dynarray imm_data;
35f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
36f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct nvfx_reg* r_imm;
37f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned nr_imm;
38f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
39f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct util_dynarray if_stack;
40f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   //struct util_dynarray loop_stack;
41f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct util_dynarray label_relocs;
42f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org};
43f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
44f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic INLINE struct nvfx_reg
45f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgtemp(struct nvfx_fpc *fpc)
46f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
47f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int idx = __builtin_ctzll(~fpc->r_temps);
48f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
49f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (idx >= fpc->max_temps) {
50f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      NOUVEAU_ERR("out of temps!!\n");
51f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      assert(0);
52f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return nvfx_reg(NVFXSR_TEMP, 0);
53f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
54f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
55f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fpc->r_temps |= (1ULL << idx);
56f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fpc->r_temps_discard |= (1ULL << idx);
57f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return nvfx_reg(NVFXSR_TEMP, idx);
58f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
59f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
60f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic INLINE void
61f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgrelease_temps(struct nvfx_fpc *fpc)
62f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
63f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fpc->r_temps &= ~fpc->r_temps_discard;
64f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fpc->r_temps_discard = 0ULL;
65f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
66f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
67f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic inline struct nvfx_reg
68f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnvfx_fp_imm(struct nvfx_fpc *fpc, float a, float b, float c, float d)
69f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
70f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   float v[4] = {a, b, c, d};
71f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int idx = fpc->imm_data.size >> 4;
72f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
73f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   memcpy(util_dynarray_grow(&fpc->imm_data, sizeof(float) * 4), v, 4 * sizeof(float));
74f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return nvfx_reg(NVFXSR_IMM, idx);
75f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
76f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
77f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void
78f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orggrow_insns(struct nvfx_fpc *fpc, int size)
79f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
80f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct nv30_fragprog *fp = fpc->fp;
81f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
82f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fp->insn_len += size;
83f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len);
84f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
85f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
86f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void
87f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgemit_src(struct nvfx_fpc *fpc, int pos, struct nvfx_src src)
88f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
89f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct nv30_fragprog *fp = fpc->fp;
90f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   uint32_t *hw = &fp->insn[fpc->inst_offset];
91f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   uint32_t sr = 0;
92f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
93f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   switch (src.reg.type) {
94f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case NVFXSR_INPUT:
95f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
96f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      hw[0] |= (src.reg.index << NVFX_FP_OP_INPUT_SRC_SHIFT);
97f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
98f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case NVFXSR_OUTPUT:
99f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      sr |= NVFX_FP_REG_SRC_HALF;
100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      /* fall-through */
101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case NVFXSR_TEMP:
102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT);
103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      sr |= (src.reg.index << NVFX_FP_REG_SRC_SHIFT);
104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case NVFXSR_IMM:
106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!fpc->have_const) {
107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         grow_insns(fpc, 4);
108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         hw = &fp->insn[fpc->inst_offset];
109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         fpc->have_const = 1;
110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      memcpy(&fp->insn[fpc->inst_offset + 4],
113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            (float*)fpc->imm_data.data + src.reg.index * 4,
114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            sizeof(uint32_t) * 4);
115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT);
117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case NVFXSR_CONST:
119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!fpc->have_const) {
120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         grow_insns(fpc, 4);
121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         hw = &fp->insn[fpc->inst_offset];
122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         fpc->have_const = 1;
123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      {
126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         struct nv30_fragprog_data *fpd;
127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         fp->consts = realloc(fp->consts, ++fp->nr_consts *
129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                    sizeof(*fpd));
130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         fpd = &fp->consts[fp->nr_consts - 1];
131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         fpd->offset = fpc->inst_offset + 4;
132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         fpd->index = src.reg.index;
133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4);
134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT);
137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case NVFXSR_NONE:
139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   default:
142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      assert(0);
143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (src.negate)
146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      sr |= NVFX_FP_REG_NEGATE;
147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (src.abs)
149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      hw[1] |= (1 << (29 + pos));
150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   sr |= ((src.swz[0] << NVFX_FP_REG_SWZ_X_SHIFT) |
152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          (src.swz[1] << NVFX_FP_REG_SWZ_Y_SHIFT) |
153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          (src.swz[2] << NVFX_FP_REG_SWZ_Z_SHIFT) |
154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          (src.swz[3] << NVFX_FP_REG_SWZ_W_SHIFT));
155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   hw[pos + 1] |= sr;
157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void
160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgemit_dst(struct nvfx_fpc *fpc, struct nvfx_reg dst)
161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct nv30_fragprog *fp = fpc->fp;
163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   uint32_t *hw = &fp->insn[fpc->inst_offset];
164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   switch (dst.type) {
166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case NVFXSR_OUTPUT:
167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (dst.index == 1)
168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         fp->fp_control |= 0x0000000e;
169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      else {
170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         hw[0] |= NVFX_FP_OP_OUT_REG_HALF;
171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         dst.index <<= 1;
172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      /* fall-through */
174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case NVFXSR_TEMP:
175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (fpc->num_regs < (dst.index + 1))
176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         fpc->num_regs = dst.index + 1;
177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case NVFXSR_NONE:
179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      hw[0] |= (1 << 30);
180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   default:
182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      assert(0);
183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   hw[0] |= (dst.index << NVFX_FP_OP_OUT_REG_SHIFT);
186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void
189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnvfx_fp_emit(struct nvfx_fpc *fpc, struct nvfx_insn insn)
190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct nv30_fragprog *fp = fpc->fp;
192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   uint32_t *hw;
193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fpc->inst_offset = fp->insn_len;
195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fpc->have_const = 0;
196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   grow_insns(fpc, 4);
197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   hw = &fp->insn[fpc->inst_offset];
198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   memset(hw, 0, sizeof(uint32_t) * 4);
199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (insn.op == NVFX_FP_OP_OPCODE_KIL)
201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      fp->fp_control |= NV30_3D_FP_CONTROL_USES_KIL;
202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   hw[0] |= (insn.op << NVFX_FP_OP_OPCODE_SHIFT);
203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   hw[0] |= (insn.mask << NVFX_FP_OP_OUTMASK_SHIFT);
204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   hw[2] |= (insn.scale << NVFX_FP_OP_DST_SCALE_SHIFT);
205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (insn.sat)
207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      hw[0] |= NVFX_FP_OP_OUT_SAT;
208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (insn.cc_update)
210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE;
211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   hw[1] |= (insn.cc_test << NVFX_FP_OP_COND_SHIFT);
212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   hw[1] |= ((insn.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        (insn.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) |
214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        (insn.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) |
215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        (insn.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT));
216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if(insn.unit >= 0)
218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   {
219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      hw[0] |= (insn.unit << NVFX_FP_OP_TEX_UNIT_SHIFT);
220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   emit_dst(fpc, insn.dst);
223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   emit_src(fpc, 0, insn.src[0]);
224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   emit_src(fpc, 1, insn.src[1]);
225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   emit_src(fpc, 2, insn.src[2]);
226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define arith(s,o,d,m,s0,s1,s2) \
229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org       nvfx_insn((s), NVFX_FP_OP_OPCODE_##o, -1, \
230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                       (d), (m), (s0), (s1), (s2))
231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define tex(s,o,u,d,m,s0,s1,s2) \
233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   nvfx_insn((s), NVFX_FP_OP_OPCODE_##o, (u), \
234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                   (d), (m), (s0), none, none)
235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* IF src.x != 0, as TGSI specifies */
237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void
238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnv40_fp_if(struct nvfx_fpc *fpc, struct nvfx_src src)
239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   const struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0));
241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct nvfx_insn insn = arith(0, MOV, none.reg, NVFX_FP_MASK_X, src, none, none);
242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   uint32_t *hw;
243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   insn.cc_update = 1;
244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   nvfx_fp_emit(fpc, insn);
245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fpc->inst_offset = fpc->fp->insn_len;
247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   grow_insns(fpc, 4);
248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   hw = &fpc->fp->insn[fpc->inst_offset];
249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   hw[0] = (NV40_FP_OP_BRA_OPCODE_IF << NVFX_FP_OP_OPCODE_SHIFT) |
251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      NV40_FP_OP_OUT_NONE |
252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT);
253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /* Use .xxxx swizzle so that we check only src[0].x*/
254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   hw[1] = (0 << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         (0 << NVFX_FP_OP_COND_SWZ_Y_SHIFT) |
256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         (0 << NVFX_FP_OP_COND_SWZ_Z_SHIFT) |
257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         (0 << NVFX_FP_OP_COND_SWZ_W_SHIFT) |
258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         (NVFX_FP_OP_COND_NE << NVFX_FP_OP_COND_SHIFT);
259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   hw[2] = 0; /* | NV40_FP_OP_OPCODE_IS_BRANCH | else_offset */
260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   hw[3] = 0; /* | endif_offset */
261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   util_dynarray_append(&fpc->if_stack, unsigned, fpc->inst_offset);
262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* IF src.x != 0, as TGSI specifies */
265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void
266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnv40_fp_cal(struct nvfx_fpc *fpc, unsigned target)
267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        struct nvfx_relocation reloc;
269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        uint32_t *hw;
270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        fpc->inst_offset = fpc->fp->insn_len;
271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        grow_insns(fpc, 4);
272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        hw = &fpc->fp->insn[fpc->inst_offset];
273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        hw[0] = (NV40_FP_OP_BRA_OPCODE_CAL << NVFX_FP_OP_OPCODE_SHIFT);
275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        /* Use .xxxx swizzle so that we check only src[0].x*/
276f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) |
277f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                        (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT);
278f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | call_offset */
279f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        hw[3] = 0;
280f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        reloc.target = target;
281f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        reloc.location = fpc->inst_offset + 2;
282f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc);
283f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
284f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
285f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void
286f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnv40_fp_ret(struct nvfx_fpc *fpc)
287f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
288f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   uint32_t *hw;
289f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fpc->inst_offset = fpc->fp->insn_len;
290f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   grow_insns(fpc, 4);
291f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   hw = &fpc->fp->insn[fpc->inst_offset];
292f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
293f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   hw[0] = (NV40_FP_OP_BRA_OPCODE_RET << NVFX_FP_OP_OPCODE_SHIFT);
294f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /* Use .xxxx swizzle so that we check only src[0].x*/
295f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) |
296f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT);
297f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | call_offset */
298f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   hw[3] = 0;
299f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
300f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
301f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void
302f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnv40_fp_rep(struct nvfx_fpc *fpc, unsigned count, unsigned target)
303f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
304f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        struct nvfx_relocation reloc;
305f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        uint32_t *hw;
306f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        fpc->inst_offset = fpc->fp->insn_len;
307f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        grow_insns(fpc, 4);
308f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        hw = &fpc->fp->insn[fpc->inst_offset];
309f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
310f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        hw[0] = (NV40_FP_OP_BRA_OPCODE_REP << NVFX_FP_OP_OPCODE_SHIFT) |
311f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                        NV40_FP_OP_OUT_NONE |
312f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                        (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT);
313f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        /* Use .xxxx swizzle so that we check only src[0].x*/
314f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) |
315f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                        (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT);
316f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH |
317f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                        (count << NV40_FP_OP_REP_COUNT1_SHIFT) |
318f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                        (count << NV40_FP_OP_REP_COUNT2_SHIFT) |
319f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                        (count << NV40_FP_OP_REP_COUNT3_SHIFT);
320f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        hw[3] = 0; /* | end_offset */
321f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        reloc.target = target;
322f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        reloc.location = fpc->inst_offset + 3;
323f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc);
324f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        //util_dynarray_append(&fpc->loop_stack, unsigned, target);
325f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
326f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
327f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* warning: this only works forward, and probably only if not inside any IF */
328f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void
329f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnv40_fp_bra(struct nvfx_fpc *fpc, unsigned target)
330f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
331f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        struct nvfx_relocation reloc;
332f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        uint32_t *hw;
333f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        fpc->inst_offset = fpc->fp->insn_len;
334f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        grow_insns(fpc, 4);
335f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        hw = &fpc->fp->insn[fpc->inst_offset];
336f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
337f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        hw[0] = (NV40_FP_OP_BRA_OPCODE_IF << NVFX_FP_OP_OPCODE_SHIFT) |
338f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                NV40_FP_OP_OUT_NONE |
339f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT);
340f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        /* Use .xxxx swizzle so that we check only src[0].x*/
341f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
342f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                        (NVFX_FP_OP_COND_FL << NVFX_FP_OP_COND_SHIFT);
343f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | else_offset */
344f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        hw[3] = 0; /* | endif_offset */
345f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        reloc.target = target;
346f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        reloc.location = fpc->inst_offset + 2;
347f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc);
348f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        reloc.target = target;
349f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        reloc.location = fpc->inst_offset + 3;
350f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc);
351f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
352f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
353f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void
354f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnv40_fp_brk(struct nvfx_fpc *fpc)
355f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
356f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   uint32_t *hw;
357f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fpc->inst_offset = fpc->fp->insn_len;
358f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   grow_insns(fpc, 4);
359f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   hw = &fpc->fp->insn[fpc->inst_offset];
360f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
361f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   hw[0] = (NV40_FP_OP_BRA_OPCODE_BRK << NVFX_FP_OP_OPCODE_SHIFT) |
362f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      NV40_FP_OP_OUT_NONE;
363f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /* Use .xxxx swizzle so that we check only src[0].x*/
364f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
365f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT);
366f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH;
367f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   hw[3] = 0;
368f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
369f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
370f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic INLINE struct nvfx_src
371f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgtgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc)
372f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
373f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct nvfx_src src;
374f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
375f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   switch (fsrc->Register.File) {
376f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_FILE_INPUT:
377f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      src.reg = fpc->r_input[fsrc->Register.Index];
378f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
379f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_FILE_CONSTANT:
380f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      src.reg = nvfx_reg(NVFXSR_CONST, fsrc->Register.Index);
381f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
382f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_FILE_IMMEDIATE:
383f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      assert(fsrc->Register.Index < fpc->nr_imm);
384f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      src.reg = fpc->r_imm[fsrc->Register.Index];
385f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
386f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_FILE_TEMPORARY:
387f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      src.reg = fpc->r_temp[fsrc->Register.Index];
388f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
389f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /* NV40 fragprog result regs are just temps, so this is simple */
390f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_FILE_OUTPUT:
391f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      src.reg = fpc->r_result[fsrc->Register.Index];
392f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
393f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   default:
394f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      NOUVEAU_ERR("bad src file\n");
395f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      src.reg.index = 0;
396f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      src.reg.type = 0;
397f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
398f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
399f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
400f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   src.abs = fsrc->Register.Absolute;
401f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   src.negate = fsrc->Register.Negate;
402f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   src.swz[0] = fsrc->Register.SwizzleX;
403f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   src.swz[1] = fsrc->Register.SwizzleY;
404f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   src.swz[2] = fsrc->Register.SwizzleZ;
405f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   src.swz[3] = fsrc->Register.SwizzleW;
406f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   src.indirect = 0;
407f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   src.indirect_reg = 0;
408f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   src.indirect_swz = 0;
409f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return src;
410f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
411f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
412f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic INLINE struct nvfx_reg
413f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgtgsi_dst(struct nvfx_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
414f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   switch (fdst->Register.File) {
415f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_FILE_OUTPUT:
416f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return fpc->r_result[fdst->Register.Index];
417f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_FILE_TEMPORARY:
418f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return fpc->r_temp[fdst->Register.Index];
419f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_FILE_NULL:
420f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return nvfx_reg(NVFXSR_NONE, 0);
421f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   default:
422f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File);
423f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return nvfx_reg(NVFXSR_NONE, 0);
424f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
425f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
426f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
427f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic INLINE int
428f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgtgsi_mask(uint tgsi)
429f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
430f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int mask = 0;
431f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
432f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_FP_MASK_X;
433f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_FP_MASK_Y;
434f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_FP_MASK_Z;
435f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_FP_MASK_W;
436f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return mask;
437f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
438f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
439f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic boolean
440f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnvfx_fragprog_parse_instruction(struct nv30_context* nvfx, struct nvfx_fpc *fpc,
441f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            const struct tgsi_full_instruction *finst)
442f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
443f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   const struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0));
444f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct nvfx_insn insn;
445f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct nvfx_src src[3], tmp;
446f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct nvfx_reg dst;
447f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int mask, sat, unit = 0;
448f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int ai = -1, ci = -1, ii = -1;
449f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int i;
450f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
451f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (finst->Instruction.Opcode == TGSI_OPCODE_END)
452f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return TRUE;
453f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
454f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
455f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      const struct tgsi_full_src_register *fsrc;
456f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
457f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      fsrc = &finst->Src[i];
458f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
459f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         src[i] = tgsi_src(fpc, fsrc);
460f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
461f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
462f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
463f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
464f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      const struct tgsi_full_src_register *fsrc;
465f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
466f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      fsrc = &finst->Src[i];
467f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
468f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      switch (fsrc->Register.File) {
469f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case TGSI_FILE_INPUT:
470f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_FOG && (0
471f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               || fsrc->Register.SwizzleX == PIPE_SWIZZLE_ALPHA
472f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               || fsrc->Register.SwizzleY == PIPE_SWIZZLE_ALPHA
473f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               || fsrc->Register.SwizzleZ == PIPE_SWIZZLE_ALPHA
474f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               || fsrc->Register.SwizzleW == PIPE_SWIZZLE_ALPHA
475f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               )) {
476f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            /* hardware puts 0 in fogcoord.w, but GL/Gallium want 1 there */
477f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            struct nvfx_src addend = nvfx_src(nvfx_fp_imm(fpc, 0, 0, 0, 1));
478f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            addend.swz[0] = fsrc->Register.SwizzleX;
479f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            addend.swz[1] = fsrc->Register.SwizzleY;
480f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            addend.swz[2] = fsrc->Register.SwizzleZ;
481f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            addend.swz[3] = fsrc->Register.SwizzleW;
482f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            src[i] = nvfx_src(temp(fpc));
483f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            nvfx_fp_emit(fpc, arith(0, ADD, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), addend, none));
484f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         } else if (ai == -1 || ai == fsrc->Register.Index) {
485f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            ai = fsrc->Register.Index;
486f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            src[i] = tgsi_src(fpc, fsrc);
487f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         } else {
488f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            src[i] = nvfx_src(temp(fpc));
489f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none));
490f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         }
491f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
492f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case TGSI_FILE_CONSTANT:
493f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if ((ci == -1 && ii == -1) ||
494f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org             ci == fsrc->Register.Index) {
495f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            ci = fsrc->Register.Index;
496f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            src[i] = tgsi_src(fpc, fsrc);
497f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         } else {
498f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            src[i] = nvfx_src(temp(fpc));
499f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none));
500f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         }
501f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
502f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case TGSI_FILE_IMMEDIATE:
503f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if ((ci == -1 && ii == -1) ||
504f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org             ii == fsrc->Register.Index) {
505f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            ii = fsrc->Register.Index;
506f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            src[i] = tgsi_src(fpc, fsrc);
507f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         } else {
508f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            src[i] = nvfx_src(temp(fpc));
509f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none));
510f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         }
511f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
512f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case TGSI_FILE_TEMPORARY:
513f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         /* handled above */
514f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
515f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case TGSI_FILE_SAMPLER:
516f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         unit = fsrc->Register.Index;
517f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
518f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case TGSI_FILE_OUTPUT:
519f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
520f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      default:
521f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         NOUVEAU_ERR("bad src file\n");
522f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         return FALSE;
523f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
524f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
525f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
526f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   dst  = tgsi_dst(fpc, &finst->Dst[0]);
527f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
528f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   sat  = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
529f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
530f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   switch (finst->Instruction.Opcode) {
531f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_ABS:
532f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, abs(src[0]), none, none));
533f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
534f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_ADD:
535f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, src[0], src[1], none));
536f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
537f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_CEIL:
538f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      tmp = nvfx_src(temp(fpc));
539f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(0, FLR, tmp.reg, mask, neg(src[0]), none, none));
540f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, neg(tmp), none, none));
541f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
542f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_CMP:
543f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      insn = arith(0, MOV, none.reg, mask, src[0], none, none);
544f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      insn.cc_update = 1;
545f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, insn);
546f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
547f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      insn = arith(sat, MOV, dst, mask, src[2], none, none);
548f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      insn.cc_test = NVFX_COND_GE;
549f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, insn);
550f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
551f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      insn = arith(sat, MOV, dst, mask, src[1], none, none);
552f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      insn.cc_test = NVFX_COND_LT;
553f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, insn);
554f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
555f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_COS:
556f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(sat, COS, dst, mask, src[0], none, none));
557f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
558f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_DDX:
559f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) {
560f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         tmp = nvfx_src(temp(fpc));
561f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, arith(sat, DDX, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none));
562f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none));
563f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, arith(sat, DDX, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none));
564f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, arith(0, MOV, dst, mask, tmp, none, none));
565f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      } else {
566f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, arith(sat, DDX, dst, mask, src[0], none, none));
567f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
568f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
569f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_DDY:
570f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) {
571f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         tmp = nvfx_src(temp(fpc));
572f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, arith(sat, DDY, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none));
573f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none));
574f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, arith(sat, DDY, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none));
575f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, arith(0, MOV, dst, mask, tmp, none, none));
576f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      } else {
577f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, arith(sat, DDY, dst, mask, src[0], none, none));
578f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
579f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
580f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_DP2:
581f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      tmp = nvfx_src(temp(fpc));
582f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], src[1], none));
583f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(0, ADD, dst, mask, swz(tmp, X, X, X, X), swz(tmp, Y, Y, Y, Y), none));
584f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
585f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_DP3:
586f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(sat, DP3, dst, mask, src[0], src[1], none));
587f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
588f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_DP4:
589f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(sat, DP4, dst, mask, src[0], src[1], none));
590f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
591f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_DPH:
592f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      tmp = nvfx_src(temp(fpc));
593f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(0, DP3, tmp.reg, NVFX_FP_MASK_X, src[0], src[1], none));
594f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, swz(tmp, X, X, X, X), swz(src[1], W, W, W, W), none));
595f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
596f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_DST:
597f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(sat, DST, dst, mask, src[0], src[1], none));
598f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
599f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_EX2:
600f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, src[0], none, none));
601f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
602f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_FLR:
603f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(sat, FLR, dst, mask, src[0], none, none));
604f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
605f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_FRC:
606f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(sat, FRC, dst, mask, src[0], none, none));
607f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
608f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_KILP:
609f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(0, KIL, none.reg, 0, none, none, none));
610f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
611f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_KIL:
612f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      insn = arith(0, MOV, none.reg, NVFX_FP_MASK_ALL, src[0], none, none);
613f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      insn.cc_update = 1;
614f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, insn);
615f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
616f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      insn = arith(0, KIL, none.reg, 0, none, none, none);
617f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      insn.cc_test = NVFX_COND_LT;
618f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, insn);
619f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
620f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_LG2:
621f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(sat, LG2, dst, mask, src[0], none, none));
622f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
623f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_LIT:
624f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if(!nvfx->is_nv4x)
625f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, arith(sat, LIT_NV30, dst, mask, src[0], none, none));
626f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      else {
627f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         /* we use FLT_MIN, so that log2 never gives -infinity, and thus multiplication by
628f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          * specular 0 always gives 0, so that ex2 gives 1, to satisfy the 0^0 = 1 requirement
629f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          *
630f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          * NOTE: if we start using half precision, we might need an fp16 FLT_MIN here instead
631f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          */
632f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         struct nvfx_src maxs = nvfx_src(nvfx_fp_imm(fpc, 0, FLT_MIN, 0, 0));
633f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         tmp = nvfx_src(temp(fpc));
634f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (ci>= 0 || ii >= 0) {
635f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, maxs, none, none));
636f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            maxs = tmp;
637f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         }
638f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, arith(0, MAX, tmp.reg, NVFX_FP_MASK_Y | NVFX_FP_MASK_W, swz(src[0], X, X, X, Y), swz(maxs, X, X, Y, Y), none));
639f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, arith(0, LG2, tmp.reg, NVFX_FP_MASK_W, swz(tmp, W, W, W, W), none, none));
640f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_W, swz(tmp, W, W, W, W), swz(src[0], W, W, W, W), none));
641f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, arith(sat, LITEX2_NV40, dst, mask, swz(tmp, Y, Y, W, W), none, none));
642f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
643f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
644f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_LRP:
645f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if(!nvfx->is_nv4x)
646f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, arith(sat, LRP_NV30, dst, mask, src[0], src[1], src[2]));
647f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      else {
648f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         tmp = nvfx_src(temp(fpc));
649f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, arith(0, MAD, tmp.reg, mask, neg(src[0]), src[2], src[2]));
650f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, src[0], src[1], tmp));
651f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
652f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
653f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_MAD:
654f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, src[0], src[1], src[2]));
655f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
656f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_MAX:
657f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(sat, MAX, dst, mask, src[0], src[1], none));
658f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
659f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_MIN:
660f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(sat, MIN, dst, mask, src[0], src[1], none));
661f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
662f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_MOV:
663f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, src[0], none, none));
664f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
665f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_MUL:
666f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(sat, MUL, dst, mask, src[0], src[1], none));
667f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
668f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_NOP:
669f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
670f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_POW:
671f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if(!nvfx->is_nv4x)
672f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, arith(sat, POW_NV30, dst, mask, src[0], src[1], none));
673f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      else {
674f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         tmp = nvfx_src(temp(fpc));
675f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, arith(0, LG2, tmp.reg, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none));
676f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none));
677f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, swz(tmp, X, X, X, X), none, none));
678f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
679f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
680f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_RCP:
681f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(sat, RCP, dst, mask, src[0], none, none));
682f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
683f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_RFL:
684f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if(!nvfx->is_nv4x)
685f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, arith(0, RFL_NV30, dst, mask, src[0], src[1], none));
686f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      else {
687f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         tmp = nvfx_src(temp(fpc));
688f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, arith(0, DP3, tmp.reg, NVFX_FP_MASK_X, src[0], src[0], none));
689f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, arith(0, DP3, tmp.reg, NVFX_FP_MASK_Y, src[0], src[1], none));
690f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         insn = arith(0, DIV, tmp.reg, NVFX_FP_MASK_Z, swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none);
691f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         insn.scale = NVFX_FP_OP_DST_SCALE_2X;
692f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, insn);
693f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, swz(tmp, Z, Z, Z, Z), src[0], neg(src[1])));
694f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
695f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
696f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_RSQ:
697f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if(!nvfx->is_nv4x)
698f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, arith(sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none));
699f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      else {
700f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         tmp = nvfx_src(temp(fpc));
701f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         insn = arith(0, LG2, tmp.reg, NVFX_FP_MASK_X, abs(swz(src[0], X, X, X, X)), none, none);
702f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         insn.scale = NVFX_FP_OP_DST_SCALE_INV_2X;
703f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, insn);
704f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, neg(swz(tmp, X, X, X, X)), none, none));
705f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
706f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
707f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_SCS:
708f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      /* avoid overwriting the source */
709f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if(src[0].swz[NVFX_SWZ_X] != NVFX_SWZ_X)
710f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      {
711f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (mask & NVFX_FP_MASK_X)
712f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            nvfx_fp_emit(fpc, arith(sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none));
713f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (mask & NVFX_FP_MASK_Y)
714f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            nvfx_fp_emit(fpc, arith(sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none));
715f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
716f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      else
717f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      {
718f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (mask & NVFX_FP_MASK_Y)
719f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            nvfx_fp_emit(fpc, arith(sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none));
720f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (mask & NVFX_FP_MASK_X)
721f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            nvfx_fp_emit(fpc, arith(sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none));
722f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
723f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
724f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_SEQ:
725f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(sat, SEQ, dst, mask, src[0], src[1], none));
726f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
727f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_SFL:
728f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(sat, SFL, dst, mask, src[0], src[1], none));
729f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
730f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_SGE:
731f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(sat, SGE, dst, mask, src[0], src[1], none));
732f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
733f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_SGT:
734f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(sat, SGT, dst, mask, src[0], src[1], none));
735f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
736f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_SIN:
737f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(sat, SIN, dst, mask, src[0], none, none));
738f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
739f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_SLE:
740f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(sat, SLE, dst, mask, src[0], src[1], none));
741f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
742f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_SLT:
743f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(sat, SLT, dst, mask, src[0], src[1], none));
744f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
745f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_SNE:
746f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(sat, SNE, dst, mask, src[0], src[1], none));
747f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
748f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_SSG:
749f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   {
750f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      struct nvfx_src minones = swz(nvfx_src(nvfx_fp_imm(fpc, -1, -1, -1, -1)), X, X, X, X);
751f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
752f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      insn = arith(sat, MOV, dst, mask, src[0], none, none);
753f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      insn.cc_update = 1;
754f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, insn);
755f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
756f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      insn = arith(0, STR, dst, mask, none, none, none);
757f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      insn.cc_test = NVFX_COND_GT;
758f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, insn);
759f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
760f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if(!sat) {
761f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         insn = arith(0, MOV, dst, mask, minones, none, none);
762f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         insn.cc_test = NVFX_COND_LT;
763f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nvfx_fp_emit(fpc, insn);
764f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
765f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
766f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
767f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_STR:
768f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(sat, STR, dst, mask, src[0], src[1], none));
769f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
770f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_SUB:
771f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, src[0], neg(src[1]), none));
772f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
773f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_TEX:
774f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, tex(sat, TEX, unit, dst, mask, src[0], none, none));
775f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
776f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        case TGSI_OPCODE_TRUNC:
777f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                tmp = nvfx_src(temp(fpc));
778f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                insn = arith(0, MOV, none.reg, mask, src[0], none, none);
779f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                insn.cc_update = 1;
780f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                nvfx_fp_emit(fpc, insn);
781f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
782f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                nvfx_fp_emit(fpc, arith(0, FLR, tmp.reg, mask, abs(src[0]), none, none));
783f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, tmp, none, none));
784f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
785f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                insn = arith(sat, MOV, dst, mask, neg(tmp), none, none);
786f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                insn.cc_test = NVFX_COND_LT;
787f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                nvfx_fp_emit(fpc, insn);
788f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                break;
789f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        case TGSI_OPCODE_TXB:
790f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                nvfx_fp_emit(fpc, tex(sat, TXB, unit, dst, mask, src[0], none, none));
791f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                break;
792f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        case TGSI_OPCODE_TXL:
793f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                if(nvfx->is_nv4x)
794f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                        nvfx_fp_emit(fpc, tex(sat, TXL_NV40, unit, dst, mask, src[0], none, none));
795f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                else /* unsupported on nv30, use TEX and hope they like it */
796f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                        nvfx_fp_emit(fpc, tex(sat, TEX, unit, dst, mask, src[0], none, none));
797f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                break;
798f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        case TGSI_OPCODE_TXP:
799f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                nvfx_fp_emit(fpc, tex(sat, TXP, unit, dst, mask, src[0], none, none));
800f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                break;
801f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_XPD:
802f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      tmp = nvfx_src(temp(fpc));
803f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none));
804f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nvfx_fp_emit(fpc, arith(sat, MAD, dst, (mask & ~NVFX_FP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp)));
805f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
806f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
807f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_IF:
808f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // MOVRC0 R31 (TR0.xyzw), R<src>:
809f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // IF (NE.xxxx) ELSE <else> END <end>
810f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if(!nvfx->use_nv4x)
811f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         goto nv3x_cflow;
812f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nv40_fp_if(fpc, src[0]);
813f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
814f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
815f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_ELSE:
816f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   {
817f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      uint32_t *hw;
818f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if(!nvfx->use_nv4x)
819f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         goto nv3x_cflow;
820f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      assert(util_dynarray_contains(&fpc->if_stack, unsigned));
821f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      hw = &fpc->fp->insn[util_dynarray_top(&fpc->if_stack, unsigned)];
822f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | fpc->fp->insn_len;
823f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
824f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
825f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
826f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_ENDIF:
827f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   {
828f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      uint32_t *hw;
829f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if(!nvfx->use_nv4x)
830f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         goto nv3x_cflow;
831f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      assert(util_dynarray_contains(&fpc->if_stack, unsigned));
832f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      hw = &fpc->fp->insn[util_dynarray_pop(&fpc->if_stack, unsigned)];
833f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if(!hw[2])
834f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | fpc->fp->insn_len;
835f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      hw[3] = fpc->fp->insn_len;
836f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
837f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
838f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
839f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_BRA:
840f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      /* This can in limited cases be implemented with an IF with the else and endif labels pointing to the target */
841f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      /* no state tracker uses this, so don't implement this for now */
842f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      assert(0);
843f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nv40_fp_bra(fpc, finst->Label.Label);
844f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
845f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
846f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_BGNSUB:
847f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_ENDSUB:
848f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      /* nothing to do here */
849f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
850f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
851f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_CAL:
852f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if(!nvfx->use_nv4x)
853f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         goto nv3x_cflow;
854f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nv40_fp_cal(fpc, finst->Label.Label);
855f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
856f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
857f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_RET:
858f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if(!nvfx->use_nv4x)
859f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         goto nv3x_cflow;
860f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nv40_fp_ret(fpc);
861f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
862f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
863f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_BGNLOOP:
864f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if(!nvfx->use_nv4x)
865f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         goto nv3x_cflow;
866f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      /* TODO: we should support using two nested REPs to allow a > 255 iteration count */
867f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nv40_fp_rep(fpc, 255, finst->Label.Label);
868f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
869f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
870f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_ENDLOOP:
871f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
872f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
873f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_BRK:
874f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if(!nvfx->use_nv4x)
875f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         goto nv3x_cflow;
876f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      nv40_fp_brk(fpc);
877f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
878f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
879f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_OPCODE_CONT:
880f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   {
881f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      static int warned = 0;
882f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if(!warned) {
883f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         NOUVEAU_ERR("Sorry, the continue keyword is not implemented: ignoring it.\n");
884f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         warned = 1;
885f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
886f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
887f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
888f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
889f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        default:
890f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
891f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return FALSE;
892f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
893f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
894f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgout:
895f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   release_temps(fpc);
896f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return TRUE;
897f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnv3x_cflow:
898f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   {
899f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      static int warned = 0;
900f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if(!warned) {
901f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         NOUVEAU_ERR(
902f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               "Sorry, control flow instructions are not supported in hardware on nv3x: ignoring them\n"
903f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               "If rendering is incorrect, try to disable GLSL support in the application.\n");
904f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         warned = 1;
905f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
906f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
907f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   goto out;
908f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
909f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
910f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic boolean
911f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnvfx_fragprog_parse_decl_input(struct nv30_context *nvfx, struct nvfx_fpc *fpc,
912f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                               const struct tgsi_full_declaration *fdec)
913f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
914f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned idx = fdec->Range.First;
915f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned hw;
916f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
917f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   switch (fdec->Semantic.Name) {
918f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_SEMANTIC_POSITION:
919f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      hw = NVFX_FP_OP_INPUT_SRC_POSITION;
920f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
921f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_SEMANTIC_COLOR:
922f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      hw = NVFX_FP_OP_INPUT_SRC_COL0 + fdec->Semantic.Index;
923f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
924f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_SEMANTIC_FOG:
925f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      hw = NVFX_FP_OP_INPUT_SRC_FOGC;
926f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
927f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_SEMANTIC_FACE:
928f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      hw = NV40_FP_OP_INPUT_SRC_FACING;
929f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
930f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_SEMANTIC_GENERIC:
931f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (fdec->Semantic.Index >= 8)
932f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         return TRUE;
933f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
934f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      fpc->fp->texcoord[fdec->Semantic.Index] = fdec->Semantic.Index;
935f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      fpc->fp->texcoords |= (1 << fdec->Semantic.Index);
936f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      fpc->fp->vp_or |= (0x00004000 << fdec->Semantic.Index);
937f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic.Index);
938f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
939f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   default:
940f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      assert(0);
941f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return FALSE;
942f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
943f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
944f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fpc->r_input[idx] = nvfx_reg(NVFXSR_INPUT, hw);
945f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return TRUE;
946f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
947f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
948f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic boolean
949f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnvfx_fragprog_assign_generic(struct nv30_context *nvfx, struct nvfx_fpc *fpc,
950f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                             const struct tgsi_full_declaration *fdec)
951f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
952f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned num_texcoords = nvfx->use_nv4x ? 10 : 8;
953f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned idx = fdec->Range.First;
954f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned hw;
955f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
956f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   switch (fdec->Semantic.Name) {
957f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_SEMANTIC_GENERIC:
958f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (fdec->Semantic.Index >= 8) {
959f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         for (hw = 0; hw < num_texcoords; hw++) {
960f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            if (fpc->fp->texcoord[hw] == 0xffff) {
961f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               fpc->fp->texcoord[hw] = fdec->Semantic.Index;
962f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               if (hw <= 7) {
963f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                  fpc->fp->texcoords |= (0x1 << hw);
964f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                  fpc->fp->vp_or |= (0x00004000 << hw);
965f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               } else {
966f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                  fpc->fp->vp_or |= (0x00001000 << (hw - 8));
967f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               }
968f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               if (fdec->Semantic.Index == 9)
969f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                  fpc->fp->point_sprite_control |= (0x00000100 << hw);
970f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               hw = NVFX_FP_OP_INPUT_SRC_TC(hw);
971f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               fpc->r_input[idx] = nvfx_reg(NVFXSR_INPUT, hw);
972f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               return TRUE;
973f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            }
974f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         }
975f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         return FALSE;
976f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
977f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return TRUE;
978f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   default:
979f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return TRUE;
980f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
981f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
982f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
983f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic boolean
984f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnvfx_fragprog_parse_decl_output(struct nv30_context* nvfx, struct nvfx_fpc *fpc,
985f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            const struct tgsi_full_declaration *fdec)
986f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
987f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned idx = fdec->Range.First;
988f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned hw;
989f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
990f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   switch (fdec->Semantic.Name) {
991f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_SEMANTIC_POSITION:
992f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      hw = 1;
993f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
994f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case TGSI_SEMANTIC_COLOR:
995f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      hw = ~0;
996f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      switch (fdec->Semantic.Index) {
997f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case 0: hw = 0; break;
998f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case 1: hw = 2; break;
999f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case 2: hw = 3; break;
1000f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case 3: hw = 4; break;
1001f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
1002f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if(hw > ((nvfx->use_nv4x) ? 4 : 2)) {
1003f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         NOUVEAU_ERR("bad rcol index\n");
1004f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         return FALSE;
1005f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
1006f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
1007f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   default:
1008f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      NOUVEAU_ERR("bad output semantic\n");
1009f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return FALSE;
1010f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1011f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1012f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw);
1013f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fpc->r_temps |= (1ULL << hw);
1014f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return TRUE;
1015f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1016f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1017f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic boolean
1018f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnvfx_fragprog_prepare(struct nv30_context* nvfx, struct nvfx_fpc *fpc)
1019f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1020f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct tgsi_parse_context p;
1021f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int high_temp = -1, i;
1022f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1023f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fpc->r_imm = CALLOC(fpc->fp->info.immediate_count, sizeof(struct nvfx_reg));
1024f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1025f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   tgsi_parse_init(&p, fpc->fp->pipe.tokens);
1026f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   while (!tgsi_parse_end_of_tokens(&p)) {
1027f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      const union tgsi_full_token *tok = &p.FullToken;
1028f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1029f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      tgsi_parse_token(&p);
1030f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      switch(tok->Token.Type) {
1031f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case TGSI_TOKEN_TYPE_DECLARATION:
1032f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      {
1033f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         const struct tgsi_full_declaration *fdec;
1034f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         fdec = &p.FullToken.FullDeclaration;
1035f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         switch (fdec->Declaration.File) {
1036f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         case TGSI_FILE_INPUT:
1037f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            if (!nvfx_fragprog_parse_decl_input(nvfx, fpc, fdec))
1038f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               goto out_err;
1039f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            break;
1040f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         case TGSI_FILE_OUTPUT:
1041f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            if (!nvfx_fragprog_parse_decl_output(nvfx, fpc, fdec))
1042f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               goto out_err;
1043f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            break;
1044f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         case TGSI_FILE_TEMPORARY:
1045f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            if (fdec->Range.Last > high_temp) {
1046f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               high_temp =
1047f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                  fdec->Range.Last;
1048f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            }
1049f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            break;
1050f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         default:
1051f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            break;
1052f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         }
1053f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
1054f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
1055f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case TGSI_TOKEN_TYPE_IMMEDIATE:
1056f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      {
1057f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         struct tgsi_full_immediate *imm;
1058f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1059f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         imm = &p.FullToken.FullImmediate;
1060f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
1061f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         assert(fpc->nr_imm < fpc->fp->info.immediate_count);
1062f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1063f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         fpc->r_imm[fpc->nr_imm++] = nvfx_fp_imm(fpc, imm->u[0].Float, imm->u[1].Float, imm->u[2].Float, imm->u[3].Float);
1064f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
1065f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
1066f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      default:
1067f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
1068f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
1069f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1070f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   tgsi_parse_free(&p);
1071f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1072f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   tgsi_parse_init(&p, fpc->fp->pipe.tokens);
1073f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   while (!tgsi_parse_end_of_tokens(&p)) {
1074f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      const struct tgsi_full_declaration *fdec;
1075f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      tgsi_parse_token(&p);
1076f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      switch(p.FullToken.Token.Type) {
1077f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case TGSI_TOKEN_TYPE_DECLARATION:
1078f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         fdec = &p.FullToken.FullDeclaration;
1079f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         switch (fdec->Declaration.File) {
1080f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         case TGSI_FILE_INPUT:
1081f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            if (!nvfx_fragprog_assign_generic(nvfx, fpc, fdec))
1082f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               goto out_err;
1083f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            break;
1084f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         default:
1085f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            break;
1086f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         }
1087f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
1088f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      default:
1089f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
1090f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
1091f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1092f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   tgsi_parse_free(&p);
1093f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1094f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (++high_temp) {
1095f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_reg));
1096f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (i = 0; i < high_temp; i++)
1097f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         fpc->r_temp[i] = temp(fpc);
1098f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      fpc->r_temps_discard = 0ULL;
1099f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return TRUE;
1102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgout_err:
1104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (fpc->r_temp) {
1105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      FREE(fpc->r_temp);
1106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      fpc->r_temp = NULL;
1107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   tgsi_parse_free(&p);
1109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return FALSE;
1110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgDEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_fp, "NVFX_DUMP_FP", FALSE)
1113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
1115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org_nvfx_fragprog_translate(struct nv30_context *nvfx, struct nv30_fragprog *fp,
1116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         boolean emulate_sprite_flipping)
1117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct tgsi_parse_context parse;
1119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct nvfx_fpc *fpc = NULL;
1120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct util_dynarray insns;
1121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fp->translated = FALSE;
1123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fp->point_sprite_control = 0;
1124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fp->vp_or = 0;
1125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fpc = CALLOC_STRUCT(nvfx_fpc);
1127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!fpc)
1128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      goto out_err;
1129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fpc->max_temps = nvfx->use_nv4x ? 48 : 32;
1131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fpc->fp = fp;
1132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fpc->num_regs = 2;
1133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   memset(fp->texcoord, 0xff, sizeof(fp->texcoord));
1134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (unsigned i = 0; i < fp->info.num_properties; ++i) {
1136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      switch (fp->info.properties[i].name) {
1137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case TGSI_PROPERTY_FS_COORD_ORIGIN:
1138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (fp->info.properties[i].data[0])
1139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            fp->coord_conventions |= NV30_3D_COORD_CONVENTIONS_ORIGIN_INVERTED;
1140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
1141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
1142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (fp->info.properties[i].data[0])
1143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            fp->coord_conventions |= NV30_3D_COORD_CONVENTIONS_CENTER_INTEGER;
1144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
1145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
1146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (fp->info.properties[i].data[0])
1147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            fp->rt_enable |= NV30_3D_RT_ENABLE_MRT;
1148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
1149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      default:
1150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
1151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
1152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!nvfx_fragprog_prepare(nvfx, fpc))
1155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      goto out_err;
1156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   tgsi_parse_init(&parse, fp->pipe.tokens);
1158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   util_dynarray_init(&insns);
1159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   while (!tgsi_parse_end_of_tokens(&parse)) {
1161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      tgsi_parse_token(&parse);
1162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      switch (parse.FullToken.Token.Type) {
1164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case TGSI_TOKEN_TYPE_INSTRUCTION:
1165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      {
1166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         const struct tgsi_full_instruction *finst;
1167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         util_dynarray_append(&insns, unsigned, fp->insn_len);
1169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         finst = &parse.FullToken.FullInstruction;
1170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (!nvfx_fragprog_parse_instruction(nvfx, fpc, finst))
1171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            goto out_err;
1172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
1173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
1174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      default:
1175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
1176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
1177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   util_dynarray_append(&insns, unsigned, fp->insn_len);
1179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for(unsigned i = 0; i < fpc->label_relocs.size; i += sizeof(struct nvfx_relocation))
1181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   {
1182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      struct nvfx_relocation* label_reloc = (struct nvfx_relocation*)((char*)fpc->label_relocs.data + i);
1183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      fp->insn[label_reloc->location] |= ((unsigned*)insns.data)[label_reloc->target];
1184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   util_dynarray_fini(&insns);
1186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if(!nvfx->is_nv4x)
1188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      fp->fp_control |= (fpc->num_regs-1)/2;
1189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   else
1190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      fp->fp_control |= fpc->num_regs << NV40_3D_FP_CONTROL_TEMP_COUNT__SHIFT;
1191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /* Terminate final instruction */
1193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if(fp->insn)
1194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      fp->insn[fpc->inst_offset] |= 0x00000001;
1195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /* Append NOP + END instruction for branches to the end of the program */
1197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fpc->inst_offset = fp->insn_len;
1198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   grow_insns(fpc, 4);
1199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fp->insn[fpc->inst_offset + 0] = 0x00000001;
1200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fp->insn[fpc->inst_offset + 1] = 0x00000000;
1201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fp->insn[fpc->inst_offset + 2] = 0x00000000;
1202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fp->insn[fpc->inst_offset + 3] = 0x00000000;
1203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if(debug_get_option_nvfx_dump_fp())
1205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   {
1206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      debug_printf("\n");
1207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      tgsi_dump(fp->pipe.tokens, 0);
1208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      debug_printf("\n%s fragment program:\n", nvfx->is_nv4x ? "nv4x" : "nv3x");
1210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (unsigned i = 0; i < fp->insn_len; i += 4)
1211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         debug_printf("%3u: %08x %08x %08x %08x\n", i >> 2, fp->insn[i], fp->insn[i + 1], fp->insn[i + 2], fp->insn[i + 3]);
1212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      debug_printf("\n");
1213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fp->translated = TRUE;
1216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgout:
1218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   tgsi_parse_free(&parse);
1219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if(fpc)
1220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   {
1221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (fpc->r_temp)
1222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         FREE(fpc->r_temp);
1223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      util_dynarray_fini(&fpc->if_stack);
1224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      util_dynarray_fini(&fpc->label_relocs);
1225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      util_dynarray_fini(&fpc->imm_data);
1226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      //util_dynarray_fini(&fpc->loop_stack);
1227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      FREE(fpc);
1228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return;
1231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgout_err:
1233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   _debug_printf("Error: failed to compile this fragment program:\n");
1234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   tgsi_dump(fp->pipe.tokens, 0);
1235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   goto out;
1236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic inline void
1239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnvfx_fp_memcpy(void* dst, const void* src, size_t len)
1240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#ifndef PIPE_ARCH_BIG_ENDIAN
1242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   memcpy(dst, src, len);
1243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#else
1244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   size_t i;
1245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for(i = 0; i < len; i += 4) {
1246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      uint32_t v = *(uint32_t*)((char*)src + i);
1247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      *(uint32_t*)((char*)dst + i) = (v >> 16) | (v << 16);
1248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#endif
1250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1251