1/*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24#include "nine_shader.h"
25
26#include "device9.h"
27#include "nine_debug.h"
28#include "nine_state.h"
29#include "vertexdeclaration9.h"
30
31#include "util/macros.h"
32#include "util/u_memory.h"
33#include "util/u_inlines.h"
34#include "pipe/p_shader_tokens.h"
35#include "tgsi/tgsi_ureg.h"
36#include "tgsi/tgsi_dump.h"
37
38#define DBG_CHANNEL DBG_SHADER
39
40#define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
41
42
43struct shader_translator;
44
45typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
46
47static inline const char *d3dsio_to_string(unsigned opcode);
48
49
50#define NINED3D_SM1_VS 0xfffe
51#define NINED3D_SM1_PS 0xffff
52
53#define NINE_MAX_COND_DEPTH 64
54#define NINE_MAX_LOOP_DEPTH 64
55
56#define NINED3DSP_END 0x0000ffff
57
58#define NINED3DSPTYPE_FLOAT4  0
59#define NINED3DSPTYPE_INT4    1
60#define NINED3DSPTYPE_BOOL    2
61
62#define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
63
64#define NINED3DSP_WRITEMASK_MASK  D3DSP_WRITEMASK_ALL
65#define NINED3DSP_WRITEMASK_SHIFT 16
66
67#define NINED3DSHADER_INST_PREDICATED (1 << 28)
68
69#define NINED3DSHADER_REL_OP_GT 1
70#define NINED3DSHADER_REL_OP_EQ 2
71#define NINED3DSHADER_REL_OP_GE 3
72#define NINED3DSHADER_REL_OP_LT 4
73#define NINED3DSHADER_REL_OP_NE 5
74#define NINED3DSHADER_REL_OP_LE 6
75
76#define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
77#define NINED3DSIO_OPCODE_FLAGS_MASK  (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
78
79#define NINED3DSI_TEXLD_PROJECT 0x1
80#define NINED3DSI_TEXLD_BIAS    0x2
81
82#define NINED3DSP_WRITEMASK_0   0x1
83#define NINED3DSP_WRITEMASK_1   0x2
84#define NINED3DSP_WRITEMASK_2   0x4
85#define NINED3DSP_WRITEMASK_3   0x8
86#define NINED3DSP_WRITEMASK_ALL 0xf
87
88#define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
89
90#define NINE_SWIZZLE4(x,y,z,w) \
91   TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
92
93#define NINE_CONSTANT_SRC(index) \
94   ureg_src_register(TGSI_FILE_CONSTANT, index)
95
96#define NINE_APPLY_SWIZZLE(src, s) \
97   ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
98
99#define NINE_CONSTANT_SRC_SWIZZLE(index, s) \
100   NINE_APPLY_SWIZZLE(NINE_CONSTANT_SRC(index), s)
101
102#define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
103#define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
104#define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
105
106/*
107 * NEG     all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
108 * BIAS    <= PS 1.4 (x-0.5)
109 * BIASNEG <= PS 1.4 (-(x-0.5))
110 * SIGN    <= PS 1.4 (2(x-0.5))
111 * SIGNNEG <= PS 1.4 (-2(x-0.5))
112 * COMP    <= PS 1.4 (1-x)
113 * X2       = PS 1.4 (2x)
114 * X2NEG    = PS 1.4 (-2x)
115 * DZ      <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
116 * DW      <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
117 * ABS     >= SM 3.0 (abs(x))
118 * ABSNEG  >= SM 3.0 (-abs(x))
119 * NOT     >= SM 2.0 pedication only
120 */
121#define NINED3DSPSM_NONE    (D3DSPSM_NONE    >> D3DSP_SRCMOD_SHIFT)
122#define NINED3DSPSM_NEG     (D3DSPSM_NEG     >> D3DSP_SRCMOD_SHIFT)
123#define NINED3DSPSM_BIAS    (D3DSPSM_BIAS    >> D3DSP_SRCMOD_SHIFT)
124#define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
125#define NINED3DSPSM_SIGN    (D3DSPSM_SIGN    >> D3DSP_SRCMOD_SHIFT)
126#define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
127#define NINED3DSPSM_COMP    (D3DSPSM_COMP    >> D3DSP_SRCMOD_SHIFT)
128#define NINED3DSPSM_X2      (D3DSPSM_X2      >> D3DSP_SRCMOD_SHIFT)
129#define NINED3DSPSM_X2NEG   (D3DSPSM_X2NEG   >> D3DSP_SRCMOD_SHIFT)
130#define NINED3DSPSM_DZ      (D3DSPSM_DZ      >> D3DSP_SRCMOD_SHIFT)
131#define NINED3DSPSM_DW      (D3DSPSM_DW      >> D3DSP_SRCMOD_SHIFT)
132#define NINED3DSPSM_ABS     (D3DSPSM_ABS     >> D3DSP_SRCMOD_SHIFT)
133#define NINED3DSPSM_ABSNEG  (D3DSPSM_ABSNEG  >> D3DSP_SRCMOD_SHIFT)
134#define NINED3DSPSM_NOT     (D3DSPSM_NOT     >> D3DSP_SRCMOD_SHIFT)
135
136static const char *sm1_mod_str[] =
137{
138    [NINED3DSPSM_NONE] = "",
139    [NINED3DSPSM_NEG] = "-",
140    [NINED3DSPSM_BIAS] = "bias",
141    [NINED3DSPSM_BIASNEG] = "biasneg",
142    [NINED3DSPSM_SIGN] = "sign",
143    [NINED3DSPSM_SIGNNEG] = "signneg",
144    [NINED3DSPSM_COMP] = "comp",
145    [NINED3DSPSM_X2] = "x2",
146    [NINED3DSPSM_X2NEG] = "x2neg",
147    [NINED3DSPSM_DZ] = "dz",
148    [NINED3DSPSM_DW] = "dw",
149    [NINED3DSPSM_ABS] = "abs",
150    [NINED3DSPSM_ABSNEG] = "-abs",
151    [NINED3DSPSM_NOT] = "not"
152};
153
154static void
155sm1_dump_writemask(BYTE mask)
156{
157    if (mask & 1) DUMP("x"); else DUMP("_");
158    if (mask & 2) DUMP("y"); else DUMP("_");
159    if (mask & 4) DUMP("z"); else DUMP("_");
160    if (mask & 8) DUMP("w"); else DUMP("_");
161}
162
163static void
164sm1_dump_swizzle(BYTE s)
165{
166    char c[4] = { 'x', 'y', 'z', 'w' };
167    DUMP("%c%c%c%c",
168         c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
169}
170
171static const char sm1_file_char[] =
172{
173    [D3DSPR_TEMP] = 'r',
174    [D3DSPR_INPUT] = 'v',
175    [D3DSPR_CONST] = 'c',
176    [D3DSPR_ADDR] = 'A',
177    [D3DSPR_RASTOUT] = 'R',
178    [D3DSPR_ATTROUT] = 'D',
179    [D3DSPR_OUTPUT] = 'o',
180    [D3DSPR_CONSTINT] = 'I',
181    [D3DSPR_COLOROUT] = 'C',
182    [D3DSPR_DEPTHOUT] = 'D',
183    [D3DSPR_SAMPLER] = 's',
184    [D3DSPR_CONST2] = 'c',
185    [D3DSPR_CONST3] = 'c',
186    [D3DSPR_CONST4] = 'c',
187    [D3DSPR_CONSTBOOL] = 'B',
188    [D3DSPR_LOOP] = 'L',
189    [D3DSPR_TEMPFLOAT16] = 'h',
190    [D3DSPR_MISCTYPE] = 'M',
191    [D3DSPR_LABEL] = 'X',
192    [D3DSPR_PREDICATE] = 'p'
193};
194
195static void
196sm1_dump_reg(BYTE file, INT index)
197{
198    switch (file) {
199    case D3DSPR_LOOP:
200        DUMP("aL");
201        break;
202    case D3DSPR_COLOROUT:
203        DUMP("oC%i", index);
204        break;
205    case D3DSPR_DEPTHOUT:
206        DUMP("oDepth");
207        break;
208    case D3DSPR_RASTOUT:
209        DUMP("oRast%i", index);
210        break;
211    case D3DSPR_CONSTINT:
212        DUMP("iconst[%i]", index);
213        break;
214    case D3DSPR_CONSTBOOL:
215        DUMP("bconst[%i]", index);
216        break;
217    default:
218        DUMP("%c%i", sm1_file_char[file], index);
219        break;
220    }
221}
222
223struct sm1_src_param
224{
225    INT idx;
226    struct sm1_src_param *rel;
227    BYTE file;
228    BYTE swizzle;
229    BYTE mod;
230    BYTE type;
231    union {
232        DWORD d[4];
233        float f[4];
234        int i[4];
235        BOOL b;
236    } imm;
237};
238static void
239sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
240
241struct sm1_dst_param
242{
243    INT idx;
244    struct sm1_src_param *rel;
245    BYTE file;
246    BYTE mask;
247    BYTE mod;
248    int8_t shift; /* sint4 */
249    BYTE type;
250};
251
252static inline void
253assert_replicate_swizzle(const struct ureg_src *reg)
254{
255    assert(reg->SwizzleY == reg->SwizzleX &&
256           reg->SwizzleZ == reg->SwizzleX &&
257           reg->SwizzleW == reg->SwizzleX);
258}
259
260static void
261sm1_dump_immediate(const struct sm1_src_param *param)
262{
263    switch (param->type) {
264    case NINED3DSPTYPE_FLOAT4:
265        DUMP("{ %f %f %f %f }",
266             param->imm.f[0], param->imm.f[1],
267             param->imm.f[2], param->imm.f[3]);
268        break;
269    case NINED3DSPTYPE_INT4:
270        DUMP("{ %i %i %i %i }",
271             param->imm.i[0], param->imm.i[1],
272             param->imm.i[2], param->imm.i[3]);
273        break;
274    case NINED3DSPTYPE_BOOL:
275        DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
276        break;
277    default:
278        assert(0);
279        break;
280    }
281}
282
283static void
284sm1_dump_src_param(const struct sm1_src_param *param)
285{
286    if (param->file == NINED3DSPR_IMMEDIATE) {
287        assert(!param->mod &&
288               !param->rel &&
289               param->swizzle == NINED3DSP_NOSWIZZLE);
290        sm1_dump_immediate(param);
291        return;
292    }
293
294    if (param->mod)
295        DUMP("%s(", sm1_mod_str[param->mod]);
296    if (param->rel) {
297        DUMP("%c[", sm1_file_char[param->file]);
298        sm1_dump_src_param(param->rel);
299        DUMP("+%i]", param->idx);
300    } else {
301        sm1_dump_reg(param->file, param->idx);
302    }
303    if (param->mod)
304       DUMP(")");
305    if (param->swizzle != NINED3DSP_NOSWIZZLE) {
306       DUMP(".");
307       sm1_dump_swizzle(param->swizzle);
308    }
309}
310
311static void
312sm1_dump_dst_param(const struct sm1_dst_param *param)
313{
314   if (param->mod & NINED3DSPDM_SATURATE)
315      DUMP("sat ");
316   if (param->mod & NINED3DSPDM_PARTIALP)
317      DUMP("pp ");
318   if (param->mod & NINED3DSPDM_CENTROID)
319      DUMP("centroid ");
320   if (param->shift < 0)
321      DUMP("/%u ", 1 << -param->shift);
322   if (param->shift > 0)
323      DUMP("*%u ", 1 << param->shift);
324
325   if (param->rel) {
326      DUMP("%c[", sm1_file_char[param->file]);
327      sm1_dump_src_param(param->rel);
328      DUMP("+%i]", param->idx);
329   } else {
330      sm1_dump_reg(param->file, param->idx);
331   }
332   if (param->mask != NINED3DSP_WRITEMASK_ALL) {
333      DUMP(".");
334      sm1_dump_writemask(param->mask);
335   }
336}
337
338struct sm1_semantic
339{
340   struct sm1_dst_param reg;
341   BYTE sampler_type;
342   D3DDECLUSAGE usage;
343   BYTE usage_idx;
344};
345
346struct sm1_op_info
347{
348    /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
349     * should be ignored completely */
350    unsigned sio;
351    unsigned opcode; /* TGSI_OPCODE_x */
352
353    /* versions are still set even handler is set */
354    struct {
355        unsigned min;
356        unsigned max;
357    } vert_version, frag_version;
358
359    /* number of regs parsed outside of special handler */
360    unsigned ndst;
361    unsigned nsrc;
362
363    /* some instructions don't map perfectly, so use a special handler */
364    translate_instruction_func handler;
365};
366
367struct sm1_instruction
368{
369    D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
370    BYTE flags;
371    BOOL coissue;
372    BOOL predicated;
373    BYTE ndst;
374    BYTE nsrc;
375    struct sm1_src_param src[4];
376    struct sm1_src_param src_rel[4];
377    struct sm1_src_param pred;
378    struct sm1_src_param dst_rel[1];
379    struct sm1_dst_param dst[1];
380
381    struct sm1_op_info *info;
382};
383
384static void
385sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
386{
387    unsigned i;
388
389    /* no info stored for these: */
390    if (insn->opcode == D3DSIO_DCL)
391        return;
392    for (i = 0; i < indent; ++i)
393        DUMP("  ");
394
395    if (insn->predicated) {
396        DUMP("@");
397        sm1_dump_src_param(&insn->pred);
398        DUMP(" ");
399    }
400    DUMP("%s", d3dsio_to_string(insn->opcode));
401    if (insn->flags) {
402        switch (insn->opcode) {
403        case D3DSIO_TEX:
404            DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
405            break;
406        default:
407            DUMP("_%x", insn->flags);
408            break;
409        }
410    }
411    if (insn->coissue)
412        DUMP("_co");
413    DUMP(" ");
414
415    for (i = 0; i < insn->ndst && i < ARRAY_SIZE(insn->dst); ++i) {
416        sm1_dump_dst_param(&insn->dst[i]);
417        DUMP(" ");
418    }
419
420    for (i = 0; i < insn->nsrc && i < ARRAY_SIZE(insn->src); ++i) {
421        sm1_dump_src_param(&insn->src[i]);
422        DUMP(" ");
423    }
424    if (insn->opcode == D3DSIO_DEF ||
425        insn->opcode == D3DSIO_DEFI ||
426        insn->opcode == D3DSIO_DEFB)
427        sm1_dump_immediate(&insn->src[0]);
428
429    DUMP("\n");
430}
431
432struct sm1_local_const
433{
434    INT idx;
435    struct ureg_src reg;
436    float f[4]; /* for indirect addressing of float constants */
437};
438
439struct shader_translator
440{
441    const DWORD *byte_code;
442    const DWORD *parse;
443    const DWORD *parse_next;
444
445    struct ureg_program *ureg;
446
447    /* shader version */
448    struct {
449        BYTE major;
450        BYTE minor;
451    } version;
452    unsigned processor; /* PIPE_SHADER_VERTEX/FRAMGENT */
453    unsigned num_constf_allowed;
454    unsigned num_consti_allowed;
455    unsigned num_constb_allowed;
456
457    boolean native_integers;
458    boolean inline_subroutines;
459    boolean lower_preds;
460    boolean want_texcoord;
461    boolean shift_wpos;
462    boolean wpos_is_sysval;
463    boolean face_is_sysval_integer;
464    unsigned texcoord_sn;
465
466    struct sm1_instruction insn; /* current instruction */
467
468    struct {
469        struct ureg_dst *r;
470        struct ureg_dst oPos;
471        struct ureg_dst oPos_out; /* the real output when doing streamout */
472        struct ureg_dst oFog;
473        struct ureg_dst oPts;
474        struct ureg_dst oCol[4];
475        struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
476        struct ureg_dst oDepth;
477        struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
478        struct ureg_src v_consecutive; /* copy in temp array of ps inputs for rel addressing */
479        struct ureg_src vPos;
480        struct ureg_src vFace;
481        struct ureg_src s;
482        struct ureg_dst p;
483        struct ureg_dst address;
484        struct ureg_dst a0;
485        struct ureg_dst tS[8]; /* texture stage registers */
486        struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
487        struct ureg_dst t[5]; /* scratch TEMPs */
488        struct ureg_src vC[2]; /* PS color in */
489        struct ureg_src vT[8]; /* PS texcoord in */
490        struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
491    } regs;
492    unsigned num_temp; /* ARRAY_SIZE(regs.r) */
493    unsigned num_scratch;
494    unsigned loop_depth;
495    unsigned loop_depth_max;
496    unsigned cond_depth;
497    unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
498    unsigned cond_labels[NINE_MAX_COND_DEPTH];
499    boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
500
501    unsigned *inst_labels; /* LABEL op */
502    unsigned num_inst_labels;
503
504    unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
505
506    struct sm1_local_const *lconstf;
507    unsigned num_lconstf;
508    struct sm1_local_const *lconsti;
509    unsigned num_lconsti;
510    struct sm1_local_const *lconstb;
511    unsigned num_lconstb;
512
513    boolean indirect_const_access;
514    boolean failure;
515
516    struct nine_vs_output_info output_info[16];
517    int num_outputs;
518
519    struct nine_shader_info *info;
520
521    int16_t op_info_map[D3DSIO_BREAKP + 1];
522};
523
524#define IS_VS (tx->processor == PIPE_SHADER_VERTEX)
525#define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT)
526
527#define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
528
529static void
530sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
531
532static void
533sm1_instruction_check(const struct sm1_instruction *insn)
534{
535    if (insn->opcode == D3DSIO_CRS)
536    {
537        if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
538        {
539            DBG("CRS.mask.w\n");
540        }
541    }
542}
543
544static void
545nine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex,
546                    int mask, int output_index)
547{
548    tx->output_info[tx->num_outputs].output_semantic = Usage;
549    tx->output_info[tx->num_outputs].output_semantic_index = UsageIndex;
550    tx->output_info[tx->num_outputs].mask = mask;
551    tx->output_info[tx->num_outputs].output_index = output_index;
552    tx->num_outputs++;
553}
554
555static boolean
556tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
557{
558   INT i;
559
560   if (index < 0 || index >= tx->num_constf_allowed) {
561       tx->failure = TRUE;
562       return FALSE;
563   }
564   for (i = 0; i < tx->num_lconstf; ++i) {
565      if (tx->lconstf[i].idx == index) {
566         *src = tx->lconstf[i].reg;
567         return TRUE;
568      }
569   }
570   return FALSE;
571}
572static boolean
573tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
574{
575   int i;
576
577   if (index < 0 || index >= tx->num_consti_allowed) {
578       tx->failure = TRUE;
579       return FALSE;
580   }
581   for (i = 0; i < tx->num_lconsti; ++i) {
582      if (tx->lconsti[i].idx == index) {
583         *src = tx->lconsti[i].reg;
584         return TRUE;
585      }
586   }
587   return FALSE;
588}
589static boolean
590tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
591{
592   int i;
593
594   if (index < 0 || index >= tx->num_constb_allowed) {
595       tx->failure = TRUE;
596       return FALSE;
597   }
598   for (i = 0; i < tx->num_lconstb; ++i) {
599      if (tx->lconstb[i].idx == index) {
600         *src = tx->lconstb[i].reg;
601         return TRUE;
602      }
603   }
604   return FALSE;
605}
606
607static void
608tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
609{
610    unsigned n;
611
612    FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed)
613
614    for (n = 0; n < tx->num_lconstf; ++n)
615        if (tx->lconstf[n].idx == index)
616            break;
617    if (n == tx->num_lconstf) {
618       if ((n % 8) == 0) {
619          tx->lconstf = REALLOC(tx->lconstf,
620                                (n + 0) * sizeof(tx->lconstf[0]),
621                                (n + 8) * sizeof(tx->lconstf[0]));
622          assert(tx->lconstf);
623       }
624       tx->num_lconstf++;
625    }
626    tx->lconstf[n].idx = index;
627    tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
628
629    memcpy(tx->lconstf[n].f, f, sizeof(tx->lconstf[n].f));
630}
631static void
632tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
633{
634    unsigned n;
635
636    FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
637
638    for (n = 0; n < tx->num_lconsti; ++n)
639        if (tx->lconsti[n].idx == index)
640            break;
641    if (n == tx->num_lconsti) {
642       if ((n % 8) == 0) {
643          tx->lconsti = REALLOC(tx->lconsti,
644                                (n + 0) * sizeof(tx->lconsti[0]),
645                                (n + 8) * sizeof(tx->lconsti[0]));
646          assert(tx->lconsti);
647       }
648       tx->num_lconsti++;
649    }
650
651    tx->lconsti[n].idx = index;
652    tx->lconsti[n].reg = tx->native_integers ?
653       ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
654       ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
655}
656static void
657tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
658{
659    unsigned n;
660
661    FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
662
663    for (n = 0; n < tx->num_lconstb; ++n)
664        if (tx->lconstb[n].idx == index)
665            break;
666    if (n == tx->num_lconstb) {
667       if ((n % 8) == 0) {
668          tx->lconstb = REALLOC(tx->lconstb,
669                                (n + 0) * sizeof(tx->lconstb[0]),
670                                (n + 8) * sizeof(tx->lconstb[0]));
671          assert(tx->lconstb);
672       }
673       tx->num_lconstb++;
674    }
675
676    tx->lconstb[n].idx = index;
677    tx->lconstb[n].reg = tx->native_integers ?
678       ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
679       ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
680}
681
682static inline struct ureg_dst
683tx_scratch(struct shader_translator *tx)
684{
685    if (tx->num_scratch >= ARRAY_SIZE(tx->regs.t)) {
686        tx->failure = TRUE;
687        return tx->regs.t[0];
688    }
689    if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
690        tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
691    return tx->regs.t[tx->num_scratch++];
692}
693
694static inline struct ureg_dst
695tx_scratch_scalar(struct shader_translator *tx)
696{
697    return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
698}
699
700static inline struct ureg_src
701tx_src_scalar(struct ureg_dst dst)
702{
703    struct ureg_src src = ureg_src(dst);
704    int c = ffs(dst.WriteMask) - 1;
705    if (dst.WriteMask == (1 << c))
706        src = ureg_scalar(src, c);
707    return src;
708}
709
710static inline void
711tx_temp_alloc(struct shader_translator *tx, INT idx)
712{
713    assert(idx >= 0);
714    if (idx >= tx->num_temp) {
715       unsigned k = tx->num_temp;
716       unsigned n = idx + 1;
717       tx->regs.r = REALLOC(tx->regs.r,
718                            k * sizeof(tx->regs.r[0]),
719                            n * sizeof(tx->regs.r[0]));
720       for (; k < n; ++k)
721          tx->regs.r[k] = ureg_dst_undef();
722       tx->num_temp = n;
723    }
724    if (ureg_dst_is_undef(tx->regs.r[idx]))
725        tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
726}
727
728static inline void
729tx_addr_alloc(struct shader_translator *tx, INT idx)
730{
731    assert(idx == 0);
732    if (ureg_dst_is_undef(tx->regs.address))
733        tx->regs.address = ureg_DECL_address(tx->ureg);
734    if (ureg_dst_is_undef(tx->regs.a0))
735        tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
736}
737
738static inline void
739tx_pred_alloc(struct shader_translator *tx, INT idx)
740{
741    assert(idx == 0);
742    if (ureg_dst_is_undef(tx->regs.p))
743        tx->regs.p = ureg_DECL_predicate(tx->ureg);
744}
745
746/* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
747 * the projection should be applied on the texture. It doesn't
748 * apply on texkill.
749 * The doc is very imprecise here (it says the projection is done
750 * before rasterization, thus in vs, which seems wrong since ps instructions
751 * are affected differently)
752 * For now we only apply to the ps TEX instruction and TEXBEM.
753 * Perhaps some other instructions would need it */
754static inline void
755apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
756                      struct ureg_src src, INT idx)
757{
758    struct ureg_dst tmp;
759    unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
760
761    /* no projection */
762    if (dim == 1) {
763        ureg_MOV(tx->ureg, dst, src);
764    } else {
765        tmp = tx_scratch_scalar(tx);
766        ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1));
767        ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src);
768    }
769}
770
771static inline void
772TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
773                         unsigned target, struct ureg_src src0,
774                         struct ureg_src src1, INT idx)
775{
776    unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
777    struct ureg_dst tmp;
778
779    /* dim == 1: no projection
780     * Looks like must be disabled when it makes no
781     * sense according the texture dimensions
782     */
783    if (dim == 1 || dim <= target) {
784        ureg_TEX(tx->ureg, dst, target, src0, src1);
785    } else if (dim == 4) {
786        ureg_TXP(tx->ureg, dst, target, src0, src1);
787    } else {
788        tmp = tx_scratch(tx);
789        apply_ps1x_projection(tx, tmp, src0, idx);
790        ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1);
791    }
792}
793
794static inline void
795tx_texcoord_alloc(struct shader_translator *tx, INT idx)
796{
797    assert(IS_PS);
798    assert(idx >= 0 && idx < ARRAY_SIZE(tx->regs.vT));
799    if (ureg_src_is_undef(tx->regs.vT[idx]))
800       tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
801                                             TGSI_INTERPOLATE_PERSPECTIVE);
802}
803
804static inline unsigned *
805tx_bgnloop(struct shader_translator *tx)
806{
807    tx->loop_depth++;
808    if (tx->loop_depth_max < tx->loop_depth)
809        tx->loop_depth_max = tx->loop_depth;
810    assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
811    return &tx->loop_labels[tx->loop_depth - 1];
812}
813
814static inline unsigned *
815tx_endloop(struct shader_translator *tx)
816{
817    assert(tx->loop_depth);
818    tx->loop_depth--;
819    ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
820                     ureg_get_instruction_number(tx->ureg));
821    return &tx->loop_labels[tx->loop_depth];
822}
823
824static struct ureg_dst
825tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
826{
827    const unsigned l = tx->loop_depth - 1;
828
829    if (!tx->loop_depth)
830    {
831        DBG("loop counter requested outside of loop\n");
832        return ureg_dst_undef();
833    }
834
835    if (ureg_dst_is_undef(tx->regs.rL[l])) {
836        /* loop or rep ctr creation */
837        tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
838        tx->loop_or_rep[l] = loop_or_rep;
839    }
840    /* loop - rep - endloop - endrep not allowed */
841    assert(tx->loop_or_rep[l] == loop_or_rep);
842
843    return tx->regs.rL[l];
844}
845
846static struct ureg_src
847tx_get_loopal(struct shader_translator *tx)
848{
849    int loop_level = tx->loop_depth - 1;
850
851    while (loop_level >= 0) {
852        /* handle loop - rep - endrep - endloop case */
853        if (tx->loop_or_rep[loop_level])
854            /* the value is in the loop counter y component (nine implementation) */
855            return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
856        loop_level--;
857    }
858
859    DBG("aL counter requested outside of loop\n");
860    return ureg_src_undef();
861}
862
863static inline unsigned *
864tx_cond(struct shader_translator *tx)
865{
866   assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
867   tx->cond_depth++;
868   return &tx->cond_labels[tx->cond_depth - 1];
869}
870
871static inline unsigned *
872tx_elsecond(struct shader_translator *tx)
873{
874   assert(tx->cond_depth);
875   return &tx->cond_labels[tx->cond_depth - 1];
876}
877
878static inline void
879tx_endcond(struct shader_translator *tx)
880{
881   assert(tx->cond_depth);
882   tx->cond_depth--;
883   ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
884                    ureg_get_instruction_number(tx->ureg));
885}
886
887static inline struct ureg_dst
888nine_ureg_dst_register(unsigned file, int index)
889{
890    return ureg_dst(ureg_src_register(file, index));
891}
892
893static inline struct ureg_src
894nine_get_position_input(struct shader_translator *tx)
895{
896    struct ureg_program *ureg = tx->ureg;
897
898    if (tx->wpos_is_sysval)
899        return ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
900    else
901        return ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION,
902                                  0, TGSI_INTERPOLATE_LINEAR);
903}
904
905static struct ureg_src
906tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
907{
908    struct ureg_program *ureg = tx->ureg;
909    struct ureg_src src;
910    struct ureg_dst tmp;
911
912    switch (param->file)
913    {
914    case D3DSPR_TEMP:
915        assert(!param->rel);
916        tx_temp_alloc(tx, param->idx);
917        src = ureg_src(tx->regs.r[param->idx]);
918        break;
919 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
920    case D3DSPR_ADDR:
921        assert(!param->rel);
922        if (IS_VS) {
923            assert(param->idx == 0);
924            /* the address register (vs only) must be
925             * assigned before use */
926            assert(!ureg_dst_is_undef(tx->regs.a0));
927            /* Round to lowest for vs1.1 (contrary to the doc), else
928             * round to nearest */
929            if (tx->version.major < 2 && tx->version.minor < 2)
930                ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0));
931            else
932                ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
933            src = ureg_src(tx->regs.address);
934        } else {
935            if (tx->version.major < 2 && tx->version.minor < 4) {
936                /* no subroutines, so should be defined */
937                src = ureg_src(tx->regs.tS[param->idx]);
938            } else {
939                tx_texcoord_alloc(tx, param->idx);
940                src = tx->regs.vT[param->idx];
941            }
942        }
943        break;
944    case D3DSPR_INPUT:
945        if (IS_VS) {
946            src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
947        } else {
948            if (tx->version.major < 3) {
949                assert(!param->rel);
950                src = ureg_DECL_fs_input_cyl_centroid(
951                    ureg, TGSI_SEMANTIC_COLOR, param->idx,
952                    TGSI_INTERPOLATE_COLOR, 0,
953                    tx->info->force_color_in_centroid ?
954                      TGSI_INTERPOLATE_LOC_CENTROID : 0,
955                    0, 1);
956            } else {
957                if(param->rel) {
958                    /* Copy all inputs (non consecutive)
959                     * to temp array (consecutive).
960                     * This is not good for performance.
961                     * A better way would be to have inputs
962                     * consecutive (would need implement alternative
963                     * way to match vs outputs and ps inputs).
964                     * However even with the better way, the temp array
965                     * copy would need to be used if some inputs
966                     * are not GENERIC or if they have different
967                     * interpolation flag. */
968                    if (ureg_src_is_undef(tx->regs.v_consecutive)) {
969                        int i;
970                        tx->regs.v_consecutive = ureg_src(ureg_DECL_array_temporary(ureg, 10, 0));
971                        for (i = 0; i < 10; i++) {
972                            if (!ureg_src_is_undef(tx->regs.v[i]))
973                                ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), tx->regs.v[i]);
974                            else
975                                ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
976                        }
977                    }
978                    src = ureg_src_array_offset(tx->regs.v_consecutive, param->idx);
979                } else {
980                    assert(param->idx < ARRAY_SIZE(tx->regs.v));
981                    src = tx->regs.v[param->idx];
982                }
983            }
984        }
985        break;
986    case D3DSPR_PREDICATE:
987        assert(!param->rel);
988        tx_pred_alloc(tx, param->idx);
989        src = ureg_src(tx->regs.p);
990        break;
991    case D3DSPR_SAMPLER:
992        assert(param->mod == NINED3DSPSM_NONE);
993        assert(param->swizzle == NINED3DSP_NOSWIZZLE);
994        assert(!param->rel);
995        src = ureg_src_register(TGSI_FILE_SAMPLER, param->idx);
996        break;
997    case D3DSPR_CONST:
998        assert(!param->rel || IS_VS);
999        if (param->rel)
1000            tx->indirect_const_access = TRUE;
1001        if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
1002            if (!param->rel)
1003                nine_info_mark_const_f_used(tx->info, param->idx);
1004            /* vswp constant handling: we use two buffers
1005             * to fit all the float constants. The special handling
1006             * doesn't need to be elsewhere, because all the instructions
1007             * accessing the constants directly are VS1, and swvp
1008             * is VS >= 2 */
1009            if (IS_VS && tx->info->swvp_on) {
1010                if (!param->rel) {
1011                    if (param->idx < 4096) {
1012                        src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
1013                        src = ureg_src_dimension(src, 0);
1014                    } else {
1015                        src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx - 4096);
1016                        src = ureg_src_dimension(src, 1);
1017                    }
1018                } else {
1019                    src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx); /* TODO: swvp rel > 4096 */
1020                    src = ureg_src_dimension(src, 0);
1021                }
1022            } else
1023                src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
1024        }
1025        if (!IS_VS && tx->version.major < 2) {
1026            /* ps 1.X clamps constants */
1027            tmp = tx_scratch(tx);
1028            ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
1029            ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
1030            src = ureg_src(tmp);
1031        }
1032        break;
1033    case D3DSPR_CONST2:
1034    case D3DSPR_CONST3:
1035    case D3DSPR_CONST4:
1036        DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
1037        assert(!"CONST2/3/4");
1038        src = ureg_imm1f(ureg, 0.0f);
1039        break;
1040    case D3DSPR_CONSTINT:
1041        /* relative adressing only possible for float constants in vs */
1042        assert(!param->rel);
1043        if (!tx_lconsti(tx, &src, param->idx)) {
1044            nine_info_mark_const_i_used(tx->info, param->idx);
1045            if (IS_VS && tx->info->swvp_on) {
1046                src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
1047                src = ureg_src_dimension(src, 2);
1048            } else
1049                src = ureg_src_register(TGSI_FILE_CONSTANT,
1050                                        tx->info->const_i_base + param->idx);
1051        }
1052        break;
1053    case D3DSPR_CONSTBOOL:
1054        assert(!param->rel);
1055        if (!tx_lconstb(tx, &src, param->idx)) {
1056           char r = param->idx / 4;
1057           char s = param->idx & 3;
1058           nine_info_mark_const_b_used(tx->info, param->idx);
1059           if (IS_VS && tx->info->swvp_on) {
1060               src = ureg_src_register(TGSI_FILE_CONSTANT, r);
1061               src = ureg_src_dimension(src, 3);
1062           } else
1063               src = ureg_src_register(TGSI_FILE_CONSTANT,
1064                                       tx->info->const_b_base + r);
1065           src = ureg_swizzle(src, s, s, s, s);
1066        }
1067        break;
1068    case D3DSPR_LOOP:
1069        if (ureg_dst_is_undef(tx->regs.address))
1070            tx->regs.address = ureg_DECL_address(ureg);
1071        if (!tx->native_integers)
1072            ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
1073        else
1074            ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
1075        src = ureg_src(tx->regs.address);
1076        break;
1077    case D3DSPR_MISCTYPE:
1078        switch (param->idx) {
1079        case D3DSMO_POSITION:
1080           if (ureg_src_is_undef(tx->regs.vPos))
1081              tx->regs.vPos = nine_get_position_input(tx);
1082           if (tx->shift_wpos) {
1083               /* TODO: do this only once */
1084               struct ureg_dst wpos = tx_scratch(tx);
1085               ureg_ADD(ureg, wpos, tx->regs.vPos,
1086                        ureg_imm4f(ureg, -0.5f, -0.5f, 0.0f, 0.0f));
1087               src = ureg_src(wpos);
1088           } else {
1089               src = tx->regs.vPos;
1090           }
1091           break;
1092        case D3DSMO_FACE:
1093           if (ureg_src_is_undef(tx->regs.vFace)) {
1094               if (tx->face_is_sysval_integer) {
1095                   tmp = tx_scratch(tx);
1096                   tx->regs.vFace =
1097                       ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0);
1098
1099                   /* convert bool to float */
1100                   ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X),
1101                             ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1));
1102                   tx->regs.vFace = ureg_src(tmp);
1103               } else {
1104                   tx->regs.vFace = ureg_DECL_fs_input(ureg,
1105                                                       TGSI_SEMANTIC_FACE, 0,
1106                                                       TGSI_INTERPOLATE_CONSTANT);
1107               }
1108               tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
1109           }
1110           src = tx->regs.vFace;
1111           break;
1112        default:
1113            assert(!"invalid src D3DSMO");
1114            break;
1115        }
1116        assert(!param->rel);
1117        break;
1118    case D3DSPR_TEMPFLOAT16:
1119        break;
1120    default:
1121        assert(!"invalid src D3DSPR");
1122    }
1123    if (param->rel)
1124        src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1125
1126    switch (param->mod) {
1127    case NINED3DSPSM_DW:
1128        tmp = tx_scratch(tx);
1129        /* NOTE: app is not allowed to read w with this modifier */
1130        ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), src);
1131        ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
1132        src = ureg_src(tmp);
1133        break;
1134    case NINED3DSPSM_DZ:
1135        tmp = tx_scratch(tx);
1136        /* NOTE: app is not allowed to read z with this modifier */
1137        ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), src);
1138        ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
1139        src = ureg_src(tmp);
1140        break;
1141    default:
1142        break;
1143    }
1144
1145    if (param->swizzle != NINED3DSP_NOSWIZZLE)
1146        src = ureg_swizzle(src,
1147                           (param->swizzle >> 0) & 0x3,
1148                           (param->swizzle >> 2) & 0x3,
1149                           (param->swizzle >> 4) & 0x3,
1150                           (param->swizzle >> 6) & 0x3);
1151
1152    switch (param->mod) {
1153    case NINED3DSPSM_ABS:
1154        src = ureg_abs(src);
1155        break;
1156    case NINED3DSPSM_ABSNEG:
1157        src = ureg_negate(ureg_abs(src));
1158        break;
1159    case NINED3DSPSM_NEG:
1160        src = ureg_negate(src);
1161        break;
1162    case NINED3DSPSM_BIAS:
1163        tmp = tx_scratch(tx);
1164        ureg_ADD(ureg, tmp, src, ureg_imm1f(ureg, -0.5f));
1165        src = ureg_src(tmp);
1166        break;
1167    case NINED3DSPSM_BIASNEG:
1168        tmp = tx_scratch(tx);
1169        ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 0.5f), ureg_negate(src));
1170        src = ureg_src(tmp);
1171        break;
1172    case NINED3DSPSM_NOT:
1173        if (tx->native_integers) {
1174            tmp = tx_scratch(tx);
1175            ureg_NOT(ureg, tmp, src);
1176            src = ureg_src(tmp);
1177            break;
1178        }
1179        /* fall through */
1180    case NINED3DSPSM_COMP:
1181        tmp = tx_scratch(tx);
1182        ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
1183        src = ureg_src(tmp);
1184        break;
1185    case NINED3DSPSM_DZ:
1186    case NINED3DSPSM_DW:
1187        /* Already handled*/
1188        break;
1189    case NINED3DSPSM_SIGN:
1190        tmp = tx_scratch(tx);
1191        ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1192        src = ureg_src(tmp);
1193        break;
1194    case NINED3DSPSM_SIGNNEG:
1195        tmp = tx_scratch(tx);
1196        ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
1197        src = ureg_src(tmp);
1198        break;
1199    case NINED3DSPSM_X2:
1200        tmp = tx_scratch(tx);
1201        ureg_ADD(ureg, tmp, src, src);
1202        src = ureg_src(tmp);
1203        break;
1204    case NINED3DSPSM_X2NEG:
1205        tmp = tx_scratch(tx);
1206        ureg_ADD(ureg, tmp, src, src);
1207        src = ureg_negate(ureg_src(tmp));
1208        break;
1209    default:
1210        assert(param->mod == NINED3DSPSM_NONE);
1211        break;
1212    }
1213
1214    return src;
1215}
1216
1217static struct ureg_dst
1218_tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1219{
1220    struct ureg_dst dst;
1221
1222    switch (param->file)
1223    {
1224    case D3DSPR_TEMP:
1225        assert(!param->rel);
1226        tx_temp_alloc(tx, param->idx);
1227        dst = tx->regs.r[param->idx];
1228        break;
1229 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1230    case D3DSPR_ADDR:
1231        assert(!param->rel);
1232        if (tx->version.major < 2 && !IS_VS) {
1233            if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
1234                tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
1235            dst = tx->regs.tS[param->idx];
1236        } else
1237        if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1238            tx_texcoord_alloc(tx, param->idx);
1239            dst = ureg_dst(tx->regs.vT[param->idx]);
1240        } else {
1241            tx_addr_alloc(tx, param->idx);
1242            dst = tx->regs.a0;
1243        }
1244        break;
1245    case D3DSPR_RASTOUT:
1246        assert(!param->rel);
1247        switch (param->idx) {
1248        case 0:
1249            if (ureg_dst_is_undef(tx->regs.oPos))
1250                tx->regs.oPos =
1251                    ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
1252            dst = tx->regs.oPos;
1253            break;
1254        case 1:
1255            if (ureg_dst_is_undef(tx->regs.oFog))
1256                tx->regs.oFog =
1257                    ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0));
1258            dst = tx->regs.oFog;
1259            break;
1260        case 2:
1261            if (ureg_dst_is_undef(tx->regs.oPts))
1262                tx->regs.oPts = ureg_DECL_temporary(tx->ureg);
1263            dst = tx->regs.oPts;
1264            break;
1265        default:
1266            assert(0);
1267            break;
1268        }
1269        break;
1270 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1271    case D3DSPR_OUTPUT:
1272        if (tx->version.major < 3) {
1273            assert(!param->rel);
1274            dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1275        } else {
1276            assert(!param->rel); /* TODO */
1277            assert(param->idx < ARRAY_SIZE(tx->regs.o));
1278            dst = tx->regs.o[param->idx];
1279        }
1280        break;
1281    case D3DSPR_ATTROUT: /* VS */
1282    case D3DSPR_COLOROUT: /* PS */
1283        assert(param->idx >= 0 && param->idx < 4);
1284        assert(!param->rel);
1285        tx->info->rt_mask |= 1 << param->idx;
1286        if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
1287            /* ps < 3: oCol[0] will have fog blending afterward */
1288            if (!IS_VS && tx->version.major < 3 && param->idx == 0) {
1289                tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
1290            } else {
1291                tx->regs.oCol[param->idx] =
1292                    ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1293            }
1294        }
1295        dst = tx->regs.oCol[param->idx];
1296        if (IS_VS && tx->version.major < 3)
1297            dst = ureg_saturate(dst);
1298        break;
1299    case D3DSPR_DEPTHOUT:
1300        assert(!param->rel);
1301        if (ureg_dst_is_undef(tx->regs.oDepth))
1302           tx->regs.oDepth =
1303              ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1304                                      TGSI_WRITEMASK_Z, 0, 1);
1305        dst = tx->regs.oDepth; /* XXX: must write .z component */
1306        break;
1307    case D3DSPR_PREDICATE:
1308        assert(!param->rel);
1309        tx_pred_alloc(tx, param->idx);
1310        dst = tx->regs.p;
1311        break;
1312    case D3DSPR_TEMPFLOAT16:
1313        DBG("unhandled D3DSPR: %u\n", param->file);
1314        break;
1315    default:
1316        assert(!"invalid dst D3DSPR");
1317        break;
1318    }
1319    if (param->rel)
1320        dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1321
1322    if (param->mask != NINED3DSP_WRITEMASK_ALL)
1323        dst = ureg_writemask(dst, param->mask);
1324    if (param->mod & NINED3DSPDM_SATURATE)
1325        dst = ureg_saturate(dst);
1326
1327    return dst;
1328}
1329
1330static struct ureg_dst
1331tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1332{
1333    if (param->shift) {
1334        tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1335        return tx->regs.tdst;
1336    }
1337    return _tx_dst_param(tx, param);
1338}
1339
1340static void
1341tx_apply_dst0_modifiers(struct shader_translator *tx)
1342{
1343    struct ureg_dst rdst;
1344    float f;
1345
1346    if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1347        return;
1348    rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1349
1350    assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1351
1352    if (tx->insn.dst[0].shift < 0)
1353        f = 1.0f / (1 << -tx->insn.dst[0].shift);
1354    else
1355        f = 1 << tx->insn.dst[0].shift;
1356
1357    ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1358}
1359
1360static struct ureg_src
1361tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1362{
1363    struct ureg_src src;
1364
1365    assert(!param->shift);
1366    assert(!(param->mod & NINED3DSPDM_SATURATE));
1367
1368    switch (param->file) {
1369    case D3DSPR_INPUT:
1370        if (IS_VS) {
1371            src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1372        } else {
1373            assert(!param->rel);
1374            assert(param->idx < ARRAY_SIZE(tx->regs.v));
1375            src = tx->regs.v[param->idx];
1376        }
1377        break;
1378    default:
1379        src = ureg_src(tx_dst_param(tx, param));
1380        break;
1381    }
1382    if (param->rel)
1383        src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1384
1385    if (!param->mask)
1386        WARN("mask is 0, using identity swizzle\n");
1387
1388    if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1389        char s[4];
1390        int n;
1391        int c;
1392        for (n = 0, c = 0; c < 4; ++c)
1393            if (param->mask & (1 << c))
1394                s[n++] = c;
1395        assert(n);
1396        for (c = n; c < 4; ++c)
1397            s[c] = s[n - 1];
1398        src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1399    }
1400    return src;
1401}
1402
1403static HRESULT
1404NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1405{
1406    struct ureg_program *ureg = tx->ureg;
1407    struct ureg_dst dst;
1408    struct ureg_src src[2];
1409    struct sm1_src_param *src_mat = &tx->insn.src[1];
1410    unsigned i;
1411
1412    dst = tx_dst_param(tx, &tx->insn.dst[0]);
1413    src[0] = tx_src_param(tx, &tx->insn.src[0]);
1414
1415    for (i = 0; i < n; i++)
1416    {
1417        const unsigned m = (1 << i);
1418
1419        src[1] = tx_src_param(tx, src_mat);
1420        src_mat->idx++;
1421
1422        if (!(dst.WriteMask & m))
1423            continue;
1424
1425        /* XXX: src == dst case ? */
1426
1427        switch (k) {
1428        case 3:
1429            ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1430            break;
1431        case 4:
1432            ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1433            break;
1434        default:
1435            DBG("invalid operation: M%ux%u\n", m, n);
1436            break;
1437        }
1438    }
1439
1440    return D3D_OK;
1441}
1442
1443#define VNOTSUPPORTED   0, 0
1444#define V(maj, min)     (((maj) << 8) | (min))
1445
1446static inline const char *
1447d3dsio_to_string( unsigned opcode )
1448{
1449    static const char *names[] = {
1450        "NOP",
1451        "MOV",
1452        "ADD",
1453        "SUB",
1454        "MAD",
1455        "MUL",
1456        "RCP",
1457        "RSQ",
1458        "DP3",
1459        "DP4",
1460        "MIN",
1461        "MAX",
1462        "SLT",
1463        "SGE",
1464        "EXP",
1465        "LOG",
1466        "LIT",
1467        "DST",
1468        "LRP",
1469        "FRC",
1470        "M4x4",
1471        "M4x3",
1472        "M3x4",
1473        "M3x3",
1474        "M3x2",
1475        "CALL",
1476        "CALLNZ",
1477        "LOOP",
1478        "RET",
1479        "ENDLOOP",
1480        "LABEL",
1481        "DCL",
1482        "POW",
1483        "CRS",
1484        "SGN",
1485        "ABS",
1486        "NRM",
1487        "SINCOS",
1488        "REP",
1489        "ENDREP",
1490        "IF",
1491        "IFC",
1492        "ELSE",
1493        "ENDIF",
1494        "BREAK",
1495        "BREAKC",
1496        "MOVA",
1497        "DEFB",
1498        "DEFI",
1499        NULL,
1500        NULL,
1501        NULL,
1502        NULL,
1503        NULL,
1504        NULL,
1505        NULL,
1506        NULL,
1507        NULL,
1508        NULL,
1509        NULL,
1510        NULL,
1511        NULL,
1512        NULL,
1513        NULL,
1514        "TEXCOORD",
1515        "TEXKILL",
1516        "TEX",
1517        "TEXBEM",
1518        "TEXBEML",
1519        "TEXREG2AR",
1520        "TEXREG2GB",
1521        "TEXM3x2PAD",
1522        "TEXM3x2TEX",
1523        "TEXM3x3PAD",
1524        "TEXM3x3TEX",
1525        NULL,
1526        "TEXM3x3SPEC",
1527        "TEXM3x3VSPEC",
1528        "EXPP",
1529        "LOGP",
1530        "CND",
1531        "DEF",
1532        "TEXREG2RGB",
1533        "TEXDP3TEX",
1534        "TEXM3x2DEPTH",
1535        "TEXDP3",
1536        "TEXM3x3",
1537        "TEXDEPTH",
1538        "CMP",
1539        "BEM",
1540        "DP2ADD",
1541        "DSX",
1542        "DSY",
1543        "TEXLDD",
1544        "SETP",
1545        "TEXLDL",
1546        "BREAKP"
1547    };
1548
1549    if (opcode < ARRAY_SIZE(names)) return names[opcode];
1550
1551    switch (opcode) {
1552    case D3DSIO_PHASE: return "PHASE";
1553    case D3DSIO_COMMENT: return "COMMENT";
1554    case D3DSIO_END: return "END";
1555    default:
1556        return NULL;
1557    }
1558}
1559
1560#define NULL_INSTRUCTION            { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1561#define IS_VALID_INSTRUCTION(inst)  ((inst).vert_version.min | \
1562                                     (inst).vert_version.max | \
1563                                     (inst).frag_version.min | \
1564                                     (inst).frag_version.max)
1565
1566#define SPECIAL(name) \
1567    NineTranslateInstruction_##name
1568
1569#define DECL_SPECIAL(name) \
1570    static HRESULT \
1571    NineTranslateInstruction_##name( struct shader_translator *tx )
1572
1573static HRESULT
1574NineTranslateInstruction_Generic(struct shader_translator *);
1575
1576DECL_SPECIAL(NOP)
1577{
1578    /* Nothing to do. NOP was used to avoid hangs
1579     * with very old d3d drivers. */
1580    return D3D_OK;
1581}
1582
1583DECL_SPECIAL(SUB)
1584{
1585    struct ureg_program *ureg = tx->ureg;
1586    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1587    struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
1588    struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
1589
1590    ureg_ADD(ureg, dst, src0, ureg_negate(src1));
1591    return D3D_OK;
1592}
1593
1594DECL_SPECIAL(ABS)
1595{
1596    struct ureg_program *ureg = tx->ureg;
1597    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1598    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1599
1600    ureg_MOV(ureg, dst, ureg_abs(src));
1601    return D3D_OK;
1602}
1603
1604DECL_SPECIAL(M4x4)
1605{
1606    return NineTranslateInstruction_Mkxn(tx, 4, 4);
1607}
1608
1609DECL_SPECIAL(M4x3)
1610{
1611    return NineTranslateInstruction_Mkxn(tx, 4, 3);
1612}
1613
1614DECL_SPECIAL(M3x4)
1615{
1616    return NineTranslateInstruction_Mkxn(tx, 3, 4);
1617}
1618
1619DECL_SPECIAL(M3x3)
1620{
1621    return NineTranslateInstruction_Mkxn(tx, 3, 3);
1622}
1623
1624DECL_SPECIAL(M3x2)
1625{
1626    return NineTranslateInstruction_Mkxn(tx, 3, 2);
1627}
1628
1629DECL_SPECIAL(CMP)
1630{
1631    ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1632             tx_src_param(tx, &tx->insn.src[0]),
1633             tx_src_param(tx, &tx->insn.src[2]),
1634             tx_src_param(tx, &tx->insn.src[1]));
1635    return D3D_OK;
1636}
1637
1638DECL_SPECIAL(CND)
1639{
1640    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1641    struct ureg_dst cgt;
1642    struct ureg_src cnd;
1643
1644    /* the coissue flag was a tip for compilers to advise to
1645     * execute two operations at the same time, in cases
1646     * the two executions had same dst with different channels.
1647     * It has no effect on current hw. However it seems CND
1648     * is affected. The handling of this very specific case
1649     * handled below mimick wine behaviour */
1650    if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
1651        ureg_MOV(tx->ureg,
1652                 dst, tx_src_param(tx, &tx->insn.src[1]));
1653        return D3D_OK;
1654    }
1655
1656    cnd = tx_src_param(tx, &tx->insn.src[0]);
1657    cgt = tx_scratch(tx);
1658
1659    if (tx->version.major == 1 && tx->version.minor < 4)
1660        cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1661
1662    ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1663
1664    ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
1665             tx_src_param(tx, &tx->insn.src[1]),
1666             tx_src_param(tx, &tx->insn.src[2]));
1667    return D3D_OK;
1668}
1669
1670DECL_SPECIAL(CALL)
1671{
1672    assert(tx->insn.src[0].idx < tx->num_inst_labels);
1673    ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1674    return D3D_OK;
1675}
1676
1677DECL_SPECIAL(CALLNZ)
1678{
1679    struct ureg_program *ureg = tx->ureg;
1680    struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1681
1682    if (!tx->native_integers)
1683        ureg_IF(ureg, src, tx_cond(tx));
1684    else
1685        ureg_UIF(ureg, src, tx_cond(tx));
1686    ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1687    tx_endcond(tx);
1688    ureg_ENDIF(ureg);
1689    return D3D_OK;
1690}
1691
1692DECL_SPECIAL(LOOP)
1693{
1694    struct ureg_program *ureg = tx->ureg;
1695    unsigned *label;
1696    struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1697    struct ureg_dst ctr;
1698    struct ureg_dst tmp;
1699    struct ureg_src ctrx;
1700
1701    label = tx_bgnloop(tx);
1702    ctr = tx_get_loopctr(tx, TRUE);
1703    ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1704
1705    /* src: num_iterations - start_value of al - step for al - 0 */
1706    ureg_MOV(ureg, ctr, src);
1707    ureg_BGNLOOP(tx->ureg, label);
1708    tmp = tx_scratch_scalar(tx);
1709    /* Initially ctr.x contains the number of iterations.
1710     * ctr.y will contain the updated value of al.
1711     * We decrease ctr.x at the end of every iteration,
1712     * and stop when it reaches 0. */
1713
1714    if (!tx->native_integers) {
1715        /* case src and ctr contain floats */
1716        /* to avoid precision issue, we stop when ctr <= 0.5 */
1717        ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1718        ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1719    } else {
1720        /* case src and ctr contain integers */
1721        ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1722        ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1723    }
1724    ureg_BRK(ureg);
1725    tx_endcond(tx);
1726    ureg_ENDIF(ureg);
1727    return D3D_OK;
1728}
1729
1730DECL_SPECIAL(RET)
1731{
1732    ureg_RET(tx->ureg);
1733    return D3D_OK;
1734}
1735
1736DECL_SPECIAL(ENDLOOP)
1737{
1738    struct ureg_program *ureg = tx->ureg;
1739    struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
1740    struct ureg_dst dst_ctrx, dst_al;
1741    struct ureg_src src_ctr, al_counter;
1742
1743    dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1744    dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1);
1745    src_ctr = ureg_src(ctr);
1746    al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z);
1747
1748    /* ctr.x -= 1
1749     * ctr.y (aL) += step */
1750    if (!tx->native_integers) {
1751        ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1752        ureg_ADD(ureg, dst_al, src_ctr, al_counter);
1753    } else {
1754        ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1755        ureg_UADD(ureg, dst_al, src_ctr, al_counter);
1756    }
1757    ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1758    return D3D_OK;
1759}
1760
1761DECL_SPECIAL(LABEL)
1762{
1763    unsigned k = tx->num_inst_labels;
1764    unsigned n = tx->insn.src[0].idx;
1765    assert(n < 2048);
1766    if (n >= k)
1767       tx->inst_labels = REALLOC(tx->inst_labels,
1768                                 k * sizeof(tx->inst_labels[0]),
1769                                 n * sizeof(tx->inst_labels[0]));
1770
1771    tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1772    return D3D_OK;
1773}
1774
1775DECL_SPECIAL(SINCOS)
1776{
1777    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1778    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1779
1780    assert(!(dst.WriteMask & 0xc));
1781
1782    dst.WriteMask &= TGSI_WRITEMASK_XY; /* z undefined, w untouched */
1783    ureg_SCS(tx->ureg, dst, src);
1784    return D3D_OK;
1785}
1786
1787DECL_SPECIAL(SGN)
1788{
1789    ureg_SSG(tx->ureg,
1790             tx_dst_param(tx, &tx->insn.dst[0]),
1791             tx_src_param(tx, &tx->insn.src[0]));
1792    return D3D_OK;
1793}
1794
1795DECL_SPECIAL(REP)
1796{
1797    struct ureg_program *ureg = tx->ureg;
1798    unsigned *label;
1799    struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1800    struct ureg_dst ctr;
1801    struct ureg_dst tmp;
1802    struct ureg_src ctrx;
1803
1804    label = tx_bgnloop(tx);
1805    ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0);
1806    ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1807
1808    /* NOTE: rep must be constant, so we don't have to save the count */
1809    assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1810
1811    /* rep: num_iterations - 0 - 0 - 0 */
1812    ureg_MOV(ureg, ctr, rep);
1813    ureg_BGNLOOP(ureg, label);
1814    tmp = tx_scratch_scalar(tx);
1815    /* Initially ctr.x contains the number of iterations.
1816     * We decrease ctr.x at the end of every iteration,
1817     * and stop when it reaches 0. */
1818
1819    if (!tx->native_integers) {
1820        /* case src and ctr contain floats */
1821        /* to avoid precision issue, we stop when ctr <= 0.5 */
1822        ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1823        ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1824    } else {
1825        /* case src and ctr contain integers */
1826        ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1827        ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1828    }
1829    ureg_BRK(ureg);
1830    tx_endcond(tx);
1831    ureg_ENDIF(ureg);
1832
1833    return D3D_OK;
1834}
1835
1836DECL_SPECIAL(ENDREP)
1837{
1838    struct ureg_program *ureg = tx->ureg;
1839    struct ureg_dst ctr = tx_get_loopctr(tx, FALSE);
1840    struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1841    struct ureg_src src_ctr = ureg_src(ctr);
1842
1843    /* ctr.x -= 1 */
1844    if (!tx->native_integers)
1845        ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1846    else
1847        ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1848
1849    ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1850    return D3D_OK;
1851}
1852
1853DECL_SPECIAL(ENDIF)
1854{
1855    tx_endcond(tx);
1856    ureg_ENDIF(tx->ureg);
1857    return D3D_OK;
1858}
1859
1860DECL_SPECIAL(IF)
1861{
1862    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1863
1864    if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1865        ureg_UIF(tx->ureg, src, tx_cond(tx));
1866    else
1867        ureg_IF(tx->ureg, src, tx_cond(tx));
1868
1869    return D3D_OK;
1870}
1871
1872static inline unsigned
1873sm1_insn_flags_to_tgsi_setop(BYTE flags)
1874{
1875    switch (flags) {
1876    case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
1877    case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
1878    case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
1879    case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
1880    case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
1881    case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
1882    default:
1883        assert(!"invalid comparison flags");
1884        return TGSI_OPCODE_SGT;
1885    }
1886}
1887
1888DECL_SPECIAL(IFC)
1889{
1890    const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1891    struct ureg_src src[2];
1892    struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1893    src[0] = tx_src_param(tx, &tx->insn.src[0]);
1894    src[1] = tx_src_param(tx, &tx->insn.src[1]);
1895    ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1896    ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1897    return D3D_OK;
1898}
1899
1900DECL_SPECIAL(ELSE)
1901{
1902    ureg_ELSE(tx->ureg, tx_elsecond(tx));
1903    return D3D_OK;
1904}
1905
1906DECL_SPECIAL(BREAKC)
1907{
1908    const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1909    struct ureg_src src[2];
1910    struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1911    src[0] = tx_src_param(tx, &tx->insn.src[0]);
1912    src[1] = tx_src_param(tx, &tx->insn.src[1]);
1913    ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1914    ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1915    ureg_BRK(tx->ureg);
1916    tx_endcond(tx);
1917    ureg_ENDIF(tx->ureg);
1918    return D3D_OK;
1919}
1920
1921static const char *sm1_declusage_names[] =
1922{
1923    [D3DDECLUSAGE_POSITION] = "POSITION",
1924    [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
1925    [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
1926    [D3DDECLUSAGE_NORMAL] = "NORMAL",
1927    [D3DDECLUSAGE_PSIZE] = "PSIZE",
1928    [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
1929    [D3DDECLUSAGE_TANGENT] = "TANGENT",
1930    [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
1931    [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
1932    [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
1933    [D3DDECLUSAGE_COLOR] = "COLOR",
1934    [D3DDECLUSAGE_FOG] = "FOG",
1935    [D3DDECLUSAGE_DEPTH] = "DEPTH",
1936    [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
1937};
1938
1939static inline unsigned
1940sm1_to_nine_declusage(struct sm1_semantic *dcl)
1941{
1942    return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
1943}
1944
1945static void
1946sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
1947                      boolean tc,
1948                      struct sm1_semantic *dcl)
1949{
1950    BYTE index = dcl->usage_idx;
1951
1952    /* For everything that is not matching to a TGSI_SEMANTIC_****,
1953     * we match to a TGSI_SEMANTIC_GENERIC with index.
1954     *
1955     * The index can be anything UINT16 and usage_idx is BYTE,
1956     * so we can fit everything. It doesn't matter if indices
1957     * are close together or low.
1958     *
1959     *
1960     * POSITION >= 1: 10 * index + 6
1961     * COLOR >= 2: 10 * (index-1) + 7
1962     * TEXCOORD[0..15]: index
1963     * BLENDWEIGHT: 10 * index + 18
1964     * BLENDINDICES: 10 * index + 19
1965     * NORMAL: 10 * index + 20
1966     * TANGENT: 10 * index + 21
1967     * BINORMAL: 10 * index + 22
1968     * TESSFACTOR: 10 * index + 23
1969     */
1970
1971    switch (dcl->usage) {
1972    case D3DDECLUSAGE_POSITION:
1973    case D3DDECLUSAGE_POSITIONT:
1974    case D3DDECLUSAGE_DEPTH:
1975        if (index == 0) {
1976            sem->Name = TGSI_SEMANTIC_POSITION;
1977            sem->Index = 0;
1978        } else {
1979            sem->Name = TGSI_SEMANTIC_GENERIC;
1980            sem->Index = 10 * index + 6;
1981        }
1982        break;
1983    case D3DDECLUSAGE_COLOR:
1984        if (index < 2) {
1985            sem->Name = TGSI_SEMANTIC_COLOR;
1986            sem->Index = index;
1987        } else {
1988            sem->Name = TGSI_SEMANTIC_GENERIC;
1989            sem->Index = 10 * (index-1) + 7;
1990        }
1991        break;
1992    case D3DDECLUSAGE_FOG:
1993        assert(index == 0);
1994        sem->Name = TGSI_SEMANTIC_FOG;
1995        sem->Index = 0;
1996        break;
1997    case D3DDECLUSAGE_PSIZE:
1998        assert(index == 0);
1999        sem->Name = TGSI_SEMANTIC_PSIZE;
2000        sem->Index = 0;
2001        break;
2002    case D3DDECLUSAGE_TEXCOORD:
2003        assert(index < 16);
2004        if (index < 8 && tc)
2005            sem->Name = TGSI_SEMANTIC_TEXCOORD;
2006        else
2007            sem->Name = TGSI_SEMANTIC_GENERIC;
2008        sem->Index = index;
2009        break;
2010    case D3DDECLUSAGE_BLENDWEIGHT:
2011        sem->Name = TGSI_SEMANTIC_GENERIC;
2012        sem->Index = 10 * index + 18;
2013        break;
2014    case D3DDECLUSAGE_BLENDINDICES:
2015        sem->Name = TGSI_SEMANTIC_GENERIC;
2016        sem->Index = 10 * index + 19;
2017        break;
2018    case D3DDECLUSAGE_NORMAL:
2019        sem->Name = TGSI_SEMANTIC_GENERIC;
2020        sem->Index = 10 * index + 20;
2021        break;
2022    case D3DDECLUSAGE_TANGENT:
2023        sem->Name = TGSI_SEMANTIC_GENERIC;
2024        sem->Index = 10 * index + 21;
2025        break;
2026    case D3DDECLUSAGE_BINORMAL:
2027        sem->Name = TGSI_SEMANTIC_GENERIC;
2028        sem->Index = 10 * index + 22;
2029        break;
2030    case D3DDECLUSAGE_TESSFACTOR:
2031        sem->Name = TGSI_SEMANTIC_GENERIC;
2032        sem->Index = 10 * index + 23;
2033        break;
2034    case D3DDECLUSAGE_SAMPLE:
2035        sem->Name = TGSI_SEMANTIC_COUNT;
2036        sem->Index = 0;
2037        break;
2038    default:
2039        unreachable("Invalid DECLUSAGE.");
2040        break;
2041    }
2042}
2043
2044#define NINED3DSTT_1D     (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
2045#define NINED3DSTT_2D     (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
2046#define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
2047#define NINED3DSTT_CUBE   (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
2048static inline unsigned
2049d3dstt_to_tgsi_tex(BYTE sampler_type)
2050{
2051    switch (sampler_type) {
2052    case NINED3DSTT_1D:     return TGSI_TEXTURE_1D;
2053    case NINED3DSTT_2D:     return TGSI_TEXTURE_2D;
2054    case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
2055    case NINED3DSTT_CUBE:   return TGSI_TEXTURE_CUBE;
2056    default:
2057        assert(0);
2058        return TGSI_TEXTURE_UNKNOWN;
2059    }
2060}
2061static inline unsigned
2062d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
2063{
2064    switch (sampler_type) {
2065    case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
2066    case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
2067    case NINED3DSTT_VOLUME:
2068    case NINED3DSTT_CUBE:
2069    default:
2070        assert(0);
2071        return TGSI_TEXTURE_UNKNOWN;
2072    }
2073}
2074static inline unsigned
2075ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
2076{
2077    switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
2078    case 1: return TGSI_TEXTURE_1D;
2079    case 0: return TGSI_TEXTURE_2D;
2080    case 3: return TGSI_TEXTURE_3D;
2081    default:
2082        return TGSI_TEXTURE_CUBE;
2083    }
2084}
2085
2086static const char *
2087sm1_sampler_type_name(BYTE sampler_type)
2088{
2089    switch (sampler_type) {
2090    case NINED3DSTT_1D:     return "1D";
2091    case NINED3DSTT_2D:     return "2D";
2092    case NINED3DSTT_VOLUME: return "VOLUME";
2093    case NINED3DSTT_CUBE:   return "CUBE";
2094    default:
2095        return "(D3DSTT_?)";
2096    }
2097}
2098
2099static inline unsigned
2100nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
2101{
2102    switch (sem->Name) {
2103    case TGSI_SEMANTIC_POSITION:
2104    case TGSI_SEMANTIC_NORMAL:
2105        return TGSI_INTERPOLATE_LINEAR;
2106    case TGSI_SEMANTIC_BCOLOR:
2107    case TGSI_SEMANTIC_COLOR:
2108        return TGSI_INTERPOLATE_COLOR;
2109    case TGSI_SEMANTIC_FOG:
2110    case TGSI_SEMANTIC_GENERIC:
2111    case TGSI_SEMANTIC_TEXCOORD:
2112    case TGSI_SEMANTIC_CLIPDIST:
2113    case TGSI_SEMANTIC_CLIPVERTEX:
2114        return TGSI_INTERPOLATE_PERSPECTIVE;
2115    case TGSI_SEMANTIC_EDGEFLAG:
2116    case TGSI_SEMANTIC_FACE:
2117    case TGSI_SEMANTIC_INSTANCEID:
2118    case TGSI_SEMANTIC_PCOORD:
2119    case TGSI_SEMANTIC_PRIMID:
2120    case TGSI_SEMANTIC_PSIZE:
2121    case TGSI_SEMANTIC_VERTEXID:
2122        return TGSI_INTERPOLATE_CONSTANT;
2123    default:
2124        assert(0);
2125        return TGSI_INTERPOLATE_CONSTANT;
2126    }
2127}
2128
2129DECL_SPECIAL(DCL)
2130{
2131    struct ureg_program *ureg = tx->ureg;
2132    boolean is_input;
2133    boolean is_sampler;
2134    struct tgsi_declaration_semantic tgsi;
2135    struct sm1_semantic sem;
2136    sm1_read_semantic(tx, &sem);
2137
2138    is_input = sem.reg.file == D3DSPR_INPUT;
2139    is_sampler =
2140        sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
2141
2142    DUMP("DCL ");
2143    sm1_dump_dst_param(&sem.reg);
2144    if (is_sampler)
2145        DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
2146    else
2147    if (tx->version.major >= 3)
2148        DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
2149    else
2150    if (sem.usage | sem.usage_idx)
2151        DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
2152    else
2153        DUMP("\n");
2154
2155    if (is_sampler) {
2156        const unsigned m = 1 << sem.reg.idx;
2157        ureg_DECL_sampler(ureg, sem.reg.idx);
2158        tx->info->sampler_mask |= m;
2159        tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
2160            d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
2161            d3dstt_to_tgsi_tex(sem.sampler_type);
2162        return D3D_OK;
2163    }
2164
2165    sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
2166    if (IS_VS) {
2167        if (is_input) {
2168            /* linkage outside of shader with vertex declaration */
2169            ureg_DECL_vs_input(ureg, sem.reg.idx);
2170            assert(sem.reg.idx < ARRAY_SIZE(tx->info->input_map));
2171            tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
2172            tx->info->num_inputs = MAX2(tx->info->num_inputs, sem.reg.idx + 1);
2173            /* NOTE: preserving order in case of indirect access */
2174        } else
2175        if (tx->version.major >= 3) {
2176            /* SM2 output semantic determined by file */
2177            assert(sem.reg.mask != 0);
2178            if (sem.usage == D3DDECLUSAGE_POSITIONT)
2179                tx->info->position_t = TRUE;
2180            assert(sem.reg.idx < ARRAY_SIZE(tx->regs.o));
2181            assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing");
2182            tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
2183                ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
2184            nine_record_outputs(tx, sem.usage, sem.usage_idx, sem.reg.mask, sem.reg.idx);
2185            if (tx->info->process_vertices && sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) {
2186                tx->regs.oPos_out = tx->regs.o[sem.reg.idx];
2187                tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2188                tx->regs.oPos = tx->regs.o[sem.reg.idx];
2189            }
2190
2191            if (tgsi.Name == TGSI_SEMANTIC_PSIZE) {
2192                tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2193                tx->regs.oPts = tx->regs.o[sem.reg.idx];
2194            }
2195        }
2196    } else {
2197        if (is_input && tx->version.major >= 3) {
2198            unsigned interp_location = 0;
2199            /* SM3 only, SM2 input semantic determined by file */
2200            assert(sem.reg.idx < ARRAY_SIZE(tx->regs.v));
2201            assert(ureg_src_is_undef(tx->regs.v[sem.reg.idx]) && "Nine doesn't support yet packing");
2202            /* PositionT and tessfactor forbidden */
2203            if (sem.usage == D3DDECLUSAGE_POSITIONT || sem.usage == D3DDECLUSAGE_TESSFACTOR)
2204                return D3DERR_INVALIDCALL;
2205
2206            if (tgsi.Name == TGSI_SEMANTIC_POSITION) {
2207                /* Position0 is forbidden (likely because vPos already does that) */
2208                if (sem.usage == D3DDECLUSAGE_POSITION)
2209                    return D3DERR_INVALIDCALL;
2210                /* Following code is for depth */
2211                tx->regs.v[sem.reg.idx] = nine_get_position_input(tx);
2212                return D3D_OK;
2213            }
2214
2215            if (sem.reg.mod & NINED3DSPDM_CENTROID ||
2216                (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid))
2217                interp_location = TGSI_INTERPOLATE_LOC_CENTROID;
2218
2219            tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
2220                ureg, tgsi.Name, tgsi.Index,
2221                nine_tgsi_to_interp_mode(&tgsi),
2222                0, /* cylwrap */
2223                interp_location, 0, 1);
2224        } else
2225        if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
2226            /* FragColor or FragDepth */
2227            assert(sem.reg.mask != 0);
2228            ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
2229                                    0, 1);
2230        }
2231    }
2232    return D3D_OK;
2233}
2234
2235DECL_SPECIAL(DEF)
2236{
2237    tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
2238    return D3D_OK;
2239}
2240
2241DECL_SPECIAL(DEFB)
2242{
2243    tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
2244    return D3D_OK;
2245}
2246
2247DECL_SPECIAL(DEFI)
2248{
2249    tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
2250    return D3D_OK;
2251}
2252
2253DECL_SPECIAL(POW)
2254{
2255    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2256    struct ureg_src src[2] = {
2257        tx_src_param(tx, &tx->insn.src[0]),
2258        tx_src_param(tx, &tx->insn.src[1])
2259    };
2260    ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2261    return D3D_OK;
2262}
2263
2264DECL_SPECIAL(RSQ)
2265{
2266    struct ureg_program *ureg = tx->ureg;
2267    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2268    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2269    struct ureg_dst tmp = tx_scratch(tx);
2270    ureg_RSQ(ureg, tmp, ureg_abs(src));
2271    ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
2272    return D3D_OK;
2273}
2274
2275DECL_SPECIAL(LOG)
2276{
2277    struct ureg_program *ureg = tx->ureg;
2278    struct ureg_dst tmp = tx_scratch_scalar(tx);
2279    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2280    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2281    ureg_LG2(ureg, tmp, ureg_abs(src));
2282    ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
2283    return D3D_OK;
2284}
2285
2286DECL_SPECIAL(LIT)
2287{
2288    struct ureg_program *ureg = tx->ureg;
2289    struct ureg_dst tmp = tx_scratch(tx);
2290    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2291    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2292    ureg_LIT(ureg, tmp, src);
2293    /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
2294     * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
2295     * it 0^0 if src.w=0, which value is driver dependent. */
2296    ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
2297             ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
2298             ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
2299    ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
2300    return D3D_OK;
2301}
2302
2303DECL_SPECIAL(NRM)
2304{
2305    struct ureg_program *ureg = tx->ureg;
2306    struct ureg_dst tmp = tx_scratch_scalar(tx);
2307    struct ureg_src nrm = tx_src_scalar(tmp);
2308    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2309    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2310    ureg_DP3(ureg, tmp, src, src);
2311    ureg_RSQ(ureg, tmp, nrm);
2312    ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
2313    ureg_MUL(ureg, dst, src, nrm);
2314    return D3D_OK;
2315}
2316
2317DECL_SPECIAL(DP2ADD)
2318{
2319    struct ureg_dst tmp = tx_scratch_scalar(tx);
2320    struct ureg_src dp2 = tx_src_scalar(tmp);
2321    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2322    struct ureg_src src[3];
2323    int i;
2324    for (i = 0; i < 3; ++i)
2325        src[i] = tx_src_param(tx, &tx->insn.src[i]);
2326    assert_replicate_swizzle(&src[2]);
2327
2328    ureg_DP2(tx->ureg, tmp, src[0], src[1]);
2329    ureg_ADD(tx->ureg, dst, src[2], dp2);
2330
2331    return D3D_OK;
2332}
2333
2334DECL_SPECIAL(TEXCOORD)
2335{
2336    struct ureg_program *ureg = tx->ureg;
2337    const unsigned s = tx->insn.dst[0].idx;
2338    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2339
2340    tx_texcoord_alloc(tx, s);
2341    ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
2342    ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
2343
2344    return D3D_OK;
2345}
2346
2347DECL_SPECIAL(TEXCOORD_ps14)
2348{
2349    struct ureg_program *ureg = tx->ureg;
2350    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2351    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2352
2353    assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
2354
2355    ureg_MOV(ureg, dst, src);
2356
2357    return D3D_OK;
2358}
2359
2360DECL_SPECIAL(TEXKILL)
2361{
2362    struct ureg_src reg;
2363
2364    if (tx->version.major > 1 || tx->version.minor > 3) {
2365        reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2366    } else {
2367        tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2368        reg = tx->regs.vT[tx->insn.dst[0].idx];
2369    }
2370    if (tx->version.major < 2)
2371        reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2372    ureg_KILL_IF(tx->ureg, reg);
2373
2374    return D3D_OK;
2375}
2376
2377DECL_SPECIAL(TEXBEM)
2378{
2379    struct ureg_program *ureg = tx->ureg;
2380    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2381    struct ureg_dst tmp, tmp2, texcoord;
2382    struct ureg_src sample, m00, m01, m10, m11;
2383    struct ureg_src bumpenvlscale, bumpenvloffset;
2384    const int m = tx->insn.dst[0].idx;
2385    const int n = tx->insn.src[0].idx;
2386
2387    assert(tx->version.major == 1);
2388
2389    sample = ureg_DECL_sampler(ureg, m);
2390    tx->info->sampler_mask |= 1 << m;
2391
2392    tx_texcoord_alloc(tx, m);
2393
2394    tmp = tx_scratch(tx);
2395    tmp2 = tx_scratch(tx);
2396    texcoord = tx_scratch(tx);
2397    /*
2398     * Bump-env-matrix:
2399     * 00 is X
2400     * 01 is Y
2401     * 10 is Z
2402     * 11 is W
2403     */
2404    nine_info_mark_const_f_used(tx->info, 8 + 8 + m/2);
2405    m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
2406    m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
2407    m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
2408    m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
2409
2410    /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
2411    if (m % 2 == 0) {
2412        bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, X);
2413        bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Y);
2414    } else {
2415        bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Z);
2416        bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, W);
2417    }
2418
2419    apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m);
2420
2421    /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R  */
2422    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2423             NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
2424    /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
2425    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2426             NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
2427             NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2428
2429    /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
2430    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2431             NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
2432    /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
2433    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2434             NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
2435             NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2436
2437    /* Now the texture coordinates are in tmp.xy */
2438
2439    if (tx->insn.opcode == D3DSIO_TEXBEM) {
2440        ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2441    } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
2442        /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
2443        ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2444        ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Z),
2445                 bumpenvlscale, bumpenvloffset);
2446        ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
2447    }
2448
2449    tx->info->bumpenvmat_needed = 1;
2450
2451    return D3D_OK;
2452}
2453
2454DECL_SPECIAL(TEXREG2AR)
2455{
2456    struct ureg_program *ureg = tx->ureg;
2457    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2458    struct ureg_src sample;
2459    const int m = tx->insn.dst[0].idx;
2460    const int n = tx->insn.src[0].idx;
2461    assert(m >= 0 && m > n);
2462
2463    sample = ureg_DECL_sampler(ureg, m);
2464    tx->info->sampler_mask |= 1 << m;
2465    ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(W,X,X,X)), sample);
2466
2467    return D3D_OK;
2468}
2469
2470DECL_SPECIAL(TEXREG2GB)
2471{
2472    struct ureg_program *ureg = tx->ureg;
2473    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2474    struct ureg_src sample;
2475    const int m = tx->insn.dst[0].idx;
2476    const int n = tx->insn.src[0].idx;
2477    assert(m >= 0 && m > n);
2478
2479    sample = ureg_DECL_sampler(ureg, m);
2480    tx->info->sampler_mask |= 1 << m;
2481    ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
2482
2483    return D3D_OK;
2484}
2485
2486DECL_SPECIAL(TEXM3x2PAD)
2487{
2488    return D3D_OK; /* this is just padding */
2489}
2490
2491DECL_SPECIAL(TEXM3x2TEX)
2492{
2493    struct ureg_program *ureg = tx->ureg;
2494    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2495    struct ureg_src sample;
2496    const int m = tx->insn.dst[0].idx - 1;
2497    const int n = tx->insn.src[0].idx;
2498    assert(m >= 0 && m > n);
2499
2500    tx_texcoord_alloc(tx, m);
2501    tx_texcoord_alloc(tx, m+1);
2502
2503    /* performs the matrix multiplication */
2504    ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2505    ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2506
2507    sample = ureg_DECL_sampler(ureg, m + 1);
2508    tx->info->sampler_mask |= 1 << (m + 1);
2509    ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
2510
2511    return D3D_OK;
2512}
2513
2514DECL_SPECIAL(TEXM3x3PAD)
2515{
2516    return D3D_OK; /* this is just padding */
2517}
2518
2519DECL_SPECIAL(TEXM3x3SPEC)
2520{
2521    struct ureg_program *ureg = tx->ureg;
2522    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2523    struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
2524    struct ureg_src sample;
2525    struct ureg_dst tmp;
2526    const int m = tx->insn.dst[0].idx - 2;
2527    const int n = tx->insn.src[0].idx;
2528    assert(m >= 0 && m > n);
2529
2530    tx_texcoord_alloc(tx, m);
2531    tx_texcoord_alloc(tx, m+1);
2532    tx_texcoord_alloc(tx, m+2);
2533
2534    ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2535    ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2536    ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2537
2538    sample = ureg_DECL_sampler(ureg, m + 2);
2539    tx->info->sampler_mask |= 1 << (m + 2);
2540    tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2541
2542    /* At this step, dst = N = (u', w', z').
2543     * We want dst to be the texture sampled at (u'', w'', z''), with
2544     * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2545    ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2546    ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2547    /* at this step tmp.x = 1/N.N */
2548    ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
2549    /* at this step tmp.y = N.E */
2550    ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2551    /* at this step tmp.x = N.E/N.N */
2552    ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2553    ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2554    /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2555    ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(E));
2556    ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2557
2558    return D3D_OK;
2559}
2560
2561DECL_SPECIAL(TEXREG2RGB)
2562{
2563    struct ureg_program *ureg = tx->ureg;
2564    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2565    struct ureg_src sample;
2566    const int m = tx->insn.dst[0].idx;
2567    const int n = tx->insn.src[0].idx;
2568    assert(m >= 0 && m > n);
2569
2570    sample = ureg_DECL_sampler(ureg, m);
2571    tx->info->sampler_mask |= 1 << m;
2572    ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tx->regs.tS[n]), sample);
2573
2574    return D3D_OK;
2575}
2576
2577DECL_SPECIAL(TEXDP3TEX)
2578{
2579    struct ureg_program *ureg = tx->ureg;
2580    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2581    struct ureg_dst tmp;
2582    struct ureg_src sample;
2583    const int m = tx->insn.dst[0].idx;
2584    const int n = tx->insn.src[0].idx;
2585    assert(m >= 0 && m > n);
2586
2587    tx_texcoord_alloc(tx, m);
2588
2589    tmp = tx_scratch(tx);
2590    ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2591    ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
2592
2593    sample = ureg_DECL_sampler(ureg, m);
2594    tx->info->sampler_mask |= 1 << m;
2595    ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2596
2597    return D3D_OK;
2598}
2599
2600DECL_SPECIAL(TEXM3x2DEPTH)
2601{
2602    struct ureg_program *ureg = tx->ureg;
2603    struct ureg_dst tmp;
2604    const int m = tx->insn.dst[0].idx - 1;
2605    const int n = tx->insn.src[0].idx;
2606    assert(m >= 0 && m > n);
2607
2608    tx_texcoord_alloc(tx, m);
2609    tx_texcoord_alloc(tx, m+1);
2610
2611    tmp = tx_scratch(tx);
2612
2613    /* performs the matrix multiplication */
2614    ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2615    ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2616
2617    ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2618    /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2619    ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
2620    /* res = 'w' == 0 ? 1.0 : z/w */
2621    ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
2622             ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
2623    /* replace the depth for depth testing with the result */
2624    tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2625                                              TGSI_WRITEMASK_Z, 0, 1);
2626    ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2627    /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2628    return D3D_OK;
2629}
2630
2631DECL_SPECIAL(TEXDP3)
2632{
2633    struct ureg_program *ureg = tx->ureg;
2634    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2635    const int m = tx->insn.dst[0].idx;
2636    const int n = tx->insn.src[0].idx;
2637    assert(m >= 0 && m > n);
2638
2639    tx_texcoord_alloc(tx, m);
2640
2641    ureg_DP3(ureg, dst, tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2642
2643    return D3D_OK;
2644}
2645
2646DECL_SPECIAL(TEXM3x3)
2647{
2648    struct ureg_program *ureg = tx->ureg;
2649    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2650    struct ureg_src sample;
2651    struct ureg_dst E, tmp;
2652    const int m = tx->insn.dst[0].idx - 2;
2653    const int n = tx->insn.src[0].idx;
2654    assert(m >= 0 && m > n);
2655
2656    tx_texcoord_alloc(tx, m);
2657    tx_texcoord_alloc(tx, m+1);
2658    tx_texcoord_alloc(tx, m+2);
2659
2660    ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2661    ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2662    ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2663
2664    switch (tx->insn.opcode) {
2665    case D3DSIO_TEXM3x3:
2666        ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2667        break;
2668    case D3DSIO_TEXM3x3TEX:
2669        sample = ureg_DECL_sampler(ureg, m + 2);
2670        tx->info->sampler_mask |= 1 << (m + 2);
2671        ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
2672        break;
2673    case D3DSIO_TEXM3x3VSPEC:
2674        sample = ureg_DECL_sampler(ureg, m + 2);
2675        tx->info->sampler_mask |= 1 << (m + 2);
2676        E = tx_scratch(tx);
2677        tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2678        ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
2679        ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
2680        ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
2681        /* At this step, dst = N = (u', w', z').
2682         * We want dst to be the texture sampled at (u'', w'', z''), with
2683         * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2684        ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2685        ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2686        /* at this step tmp.x = 1/N.N */
2687        ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
2688        /* at this step tmp.y = N.E */
2689        ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2690        /* at this step tmp.x = N.E/N.N */
2691        ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2692        ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2693        /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2694        ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(ureg_src(E)));
2695        ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2696        break;
2697    default:
2698        return D3DERR_INVALIDCALL;
2699    }
2700    return D3D_OK;
2701}
2702
2703DECL_SPECIAL(TEXDEPTH)
2704{
2705    struct ureg_program *ureg = tx->ureg;
2706    struct ureg_dst r5;
2707    struct ureg_src r5r, r5g;
2708
2709    assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
2710
2711    /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2712     * r5 won't be used afterward, thus we can use r5.ba */
2713    r5 = tx->regs.r[5];
2714    r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
2715    r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
2716
2717    ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
2718    ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
2719    /* r5.r = r/g */
2720    ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
2721             r5r, ureg_imm1f(ureg, 1.0f));
2722    /* replace the depth for depth testing with the result */
2723    tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2724                                              TGSI_WRITEMASK_Z, 0, 1);
2725    ureg_MOV(ureg, tx->regs.oDepth, r5r);
2726
2727    return D3D_OK;
2728}
2729
2730DECL_SPECIAL(BEM)
2731{
2732    struct ureg_program *ureg = tx->ureg;
2733    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2734    struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
2735    struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
2736    struct ureg_src m00, m01, m10, m11;
2737    const int m = tx->insn.dst[0].idx;
2738    struct ureg_dst tmp;
2739    /*
2740     * Bump-env-matrix:
2741     * 00 is X
2742     * 01 is Y
2743     * 10 is Z
2744     * 11 is W
2745     */
2746    nine_info_mark_const_f_used(tx->info, 8 + m);
2747    m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
2748    m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
2749    m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
2750    m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
2751    /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r  */
2752    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2753             NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X));
2754    /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
2755    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2756             NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2757
2758    /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
2759    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2760             NINE_APPLY_SWIZZLE(src1, X), src0);
2761    /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
2762    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2763             NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2764    ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
2765
2766    tx->info->bumpenvmat_needed = 1;
2767
2768    return D3D_OK;
2769}
2770
2771DECL_SPECIAL(TEXLD)
2772{
2773    struct ureg_program *ureg = tx->ureg;
2774    unsigned target;
2775    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2776    struct ureg_src src[2] = {
2777        tx_src_param(tx, &tx->insn.src[0]),
2778        tx_src_param(tx, &tx->insn.src[1])
2779    };
2780    assert(tx->insn.src[1].idx >= 0 &&
2781           tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2782    target = tx->sampler_targets[tx->insn.src[1].idx];
2783
2784    switch (tx->insn.flags) {
2785    case 0:
2786        ureg_TEX(ureg, dst, target, src[0], src[1]);
2787        break;
2788    case NINED3DSI_TEXLD_PROJECT:
2789        ureg_TXP(ureg, dst, target, src[0], src[1]);
2790        break;
2791    case NINED3DSI_TEXLD_BIAS:
2792        ureg_TXB(ureg, dst, target, src[0], src[1]);
2793        break;
2794    default:
2795        assert(0);
2796        return D3DERR_INVALIDCALL;
2797    }
2798    return D3D_OK;
2799}
2800
2801DECL_SPECIAL(TEXLD_14)
2802{
2803    struct ureg_program *ureg = tx->ureg;
2804    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2805    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2806    const unsigned s = tx->insn.dst[0].idx;
2807    const unsigned t = ps1x_sampler_type(tx->info, s);
2808
2809    tx->info->sampler_mask |= 1 << s;
2810    ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
2811
2812    return D3D_OK;
2813}
2814
2815DECL_SPECIAL(TEX)
2816{
2817    struct ureg_program *ureg = tx->ureg;
2818    const unsigned s = tx->insn.dst[0].idx;
2819    const unsigned t = ps1x_sampler_type(tx->info, s);
2820    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2821    struct ureg_src src[2];
2822
2823    tx_texcoord_alloc(tx, s);
2824
2825    src[0] = tx->regs.vT[s];
2826    src[1] = ureg_DECL_sampler(ureg, s);
2827    tx->info->sampler_mask |= 1 << s;
2828
2829    TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s);
2830
2831    return D3D_OK;
2832}
2833
2834DECL_SPECIAL(TEXLDD)
2835{
2836    unsigned target;
2837    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2838    struct ureg_src src[4] = {
2839        tx_src_param(tx, &tx->insn.src[0]),
2840        tx_src_param(tx, &tx->insn.src[1]),
2841        tx_src_param(tx, &tx->insn.src[2]),
2842        tx_src_param(tx, &tx->insn.src[3])
2843    };
2844    assert(tx->insn.src[1].idx >= 0 &&
2845           tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2846    target = tx->sampler_targets[tx->insn.src[1].idx];
2847
2848    ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
2849    return D3D_OK;
2850}
2851
2852DECL_SPECIAL(TEXLDL)
2853{
2854    unsigned target;
2855    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2856    struct ureg_src src[2] = {
2857       tx_src_param(tx, &tx->insn.src[0]),
2858       tx_src_param(tx, &tx->insn.src[1])
2859    };
2860    assert(tx->insn.src[1].idx >= 0 &&
2861           tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2862    target = tx->sampler_targets[tx->insn.src[1].idx];
2863
2864    ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
2865    return D3D_OK;
2866}
2867
2868DECL_SPECIAL(SETP)
2869{
2870    STUB(D3DERR_INVALIDCALL);
2871}
2872
2873DECL_SPECIAL(BREAKP)
2874{
2875    STUB(D3DERR_INVALIDCALL);
2876}
2877
2878DECL_SPECIAL(PHASE)
2879{
2880    return D3D_OK; /* we don't care about phase */
2881}
2882
2883DECL_SPECIAL(COMMENT)
2884{
2885    return D3D_OK; /* nothing to do */
2886}
2887
2888
2889#define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
2890    { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
2891
2892struct sm1_op_info inst_table[] =
2893{
2894    _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(NOP)), /* 0 */
2895    _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2896    _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
2897    _OPI(SUB, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB)), /* 3 */
2898    _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
2899    _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
2900    _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */
2901    _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
2902    _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
2903    _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
2904    _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
2905    _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
2906    _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
2907    _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
2908    _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
2909    _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
2910    _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
2911    _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
2912    _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
2913    _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
2914
2915    _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
2916    _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
2917    _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
2918    _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
2919    _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
2920
2921    _OPI(CALL,    CAL,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
2922    _OPI(CALLNZ,  CAL,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
2923    _OPI(LOOP,    BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
2924    _OPI(RET,     RET,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
2925    _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
2926    _OPI(LABEL,   NOP,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
2927
2928    _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
2929
2930    _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
2931    _OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */
2932    _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
2933    _OPI(ABS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS)),
2934    _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
2935
2936    _OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
2937    _OPI(SINCOS, SCS, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
2938
2939    /* More flow control */
2940    _OPI(REP,    NOP,    V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
2941    _OPI(ENDREP, NOP,    V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
2942    _OPI(IF,     IF,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
2943    _OPI(IFC,    IF,     V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
2944    _OPI(ELSE,   ELSE,   V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
2945    _OPI(ENDIF,  ENDIF,  V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
2946    _OPI(BREAK,  BRK,    V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
2947    _OPI(BREAKC, BREAKC, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
2948    /* we don't write to the address register, but a normal register (copied
2949     * when needed to the address register), thus we don't use ARR */
2950    _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2951
2952    _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
2953    _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
2954
2955    _OPI(TEXCOORD,     NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
2956    _OPI(TEXCOORD,     MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
2957    _OPI(TEXKILL,      KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
2958    _OPI(TEX,          TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
2959    _OPI(TEX,          TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
2960    _OPI(TEX,          TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
2961    _OPI(TEXBEM,       TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2962    _OPI(TEXBEML,      TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2963    _OPI(TEXREG2AR,    TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
2964    _OPI(TEXREG2GB,    TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
2965    _OPI(TEXM3x2PAD,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
2966    _OPI(TEXM3x2TEX,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
2967    _OPI(TEXM3x3PAD,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
2968    _OPI(TEXM3x3TEX,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2969    _OPI(TEXM3x3SPEC,  TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
2970    _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2971
2972    _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
2973    _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2974    _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
2975    _OPI(CND,  NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
2976
2977    _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
2978
2979    /* More tex stuff */
2980    _OPI(TEXREG2RGB,   TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
2981    _OPI(TEXDP3TEX,    TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
2982    _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
2983    _OPI(TEXDP3,       TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
2984    _OPI(TEXM3x3,      TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2985    _OPI(TEXDEPTH,     TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
2986
2987    /* Misc */
2988    _OPI(CMP,    CMP,  V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
2989    _OPI(BEM,    NOP,  V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
2990    _OPI(DP2ADD, NOP,  V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
2991    _OPI(DSX,    DDX,  V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2992    _OPI(DSY,    DDY,  V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2993    _OPI(TEXLDD, TXD,  V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
2994    _OPI(SETP,   NOP,  V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
2995    _OPI(TEXLDL, TXL,  V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
2996    _OPI(BREAKP, BRK,  V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
2997};
2998
2999struct sm1_op_info inst_phase =
3000    _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
3001
3002struct sm1_op_info inst_comment =
3003    _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
3004
3005static void
3006create_op_info_map(struct shader_translator *tx)
3007{
3008    const unsigned version = (tx->version.major << 8) | tx->version.minor;
3009    unsigned i;
3010
3011    for (i = 0; i < ARRAY_SIZE(tx->op_info_map); ++i)
3012        tx->op_info_map[i] = -1;
3013
3014    if (tx->processor == PIPE_SHADER_VERTEX) {
3015        for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3016            assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3017            if (inst_table[i].vert_version.min <= version &&
3018                inst_table[i].vert_version.max >= version)
3019                tx->op_info_map[inst_table[i].sio] = i;
3020        }
3021    } else {
3022        for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3023            assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3024            if (inst_table[i].frag_version.min <= version &&
3025                inst_table[i].frag_version.max >= version)
3026                tx->op_info_map[inst_table[i].sio] = i;
3027        }
3028    }
3029}
3030
3031static inline HRESULT
3032NineTranslateInstruction_Generic(struct shader_translator *tx)
3033{
3034    struct ureg_dst dst[1];
3035    struct ureg_src src[4];
3036    unsigned i;
3037
3038    for (i = 0; i < tx->insn.ndst && i < ARRAY_SIZE(dst); ++i)
3039        dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
3040    for (i = 0; i < tx->insn.nsrc && i < ARRAY_SIZE(src); ++i)
3041        src[i] = tx_src_param(tx, &tx->insn.src[i]);
3042
3043    ureg_insn(tx->ureg, tx->insn.info->opcode,
3044              dst, tx->insn.ndst,
3045              src, tx->insn.nsrc);
3046    return D3D_OK;
3047}
3048
3049static inline DWORD
3050TOKEN_PEEK(struct shader_translator *tx)
3051{
3052    return *(tx->parse);
3053}
3054
3055static inline DWORD
3056TOKEN_NEXT(struct shader_translator *tx)
3057{
3058    return *(tx->parse)++;
3059}
3060
3061static inline void
3062TOKEN_JUMP(struct shader_translator *tx)
3063{
3064    if (tx->parse_next && tx->parse != tx->parse_next) {
3065        WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
3066        tx->parse = tx->parse_next;
3067    }
3068}
3069
3070static inline boolean
3071sm1_parse_eof(struct shader_translator *tx)
3072{
3073    return TOKEN_PEEK(tx) == NINED3DSP_END;
3074}
3075
3076static void
3077sm1_read_version(struct shader_translator *tx)
3078{
3079    const DWORD tok = TOKEN_NEXT(tx);
3080
3081    tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
3082    tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
3083
3084    switch (tok >> 16) {
3085    case NINED3D_SM1_VS: tx->processor = PIPE_SHADER_VERTEX; break;
3086    case NINED3D_SM1_PS: tx->processor = PIPE_SHADER_FRAGMENT; break;
3087    default:
3088       DBG("Invalid shader type: %x\n", tok);
3089       tx->processor = ~0;
3090       break;
3091    }
3092}
3093
3094/* This is just to check if we parsed the instruction properly. */
3095static void
3096sm1_parse_get_skip(struct shader_translator *tx)
3097{
3098    const DWORD tok = TOKEN_PEEK(tx);
3099
3100    if (tx->version.major >= 2) {
3101        tx->parse_next = tx->parse + 1 /* this */ +
3102            ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
3103    } else {
3104        tx->parse_next = NULL; /* TODO: determine from param count */
3105    }
3106}
3107
3108static void
3109sm1_print_comment(const char *comment, UINT size)
3110{
3111    if (!size)
3112        return;
3113    /* TODO */
3114}
3115
3116static void
3117sm1_parse_comments(struct shader_translator *tx, BOOL print)
3118{
3119    DWORD tok = TOKEN_PEEK(tx);
3120
3121    while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
3122    {
3123        const char *comment = "";
3124        UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
3125        tx->parse += size + 1;
3126
3127        if (print)
3128            sm1_print_comment(comment, size);
3129
3130        tok = TOKEN_PEEK(tx);
3131    }
3132}
3133
3134static void
3135sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
3136{
3137    *reg = TOKEN_NEXT(tx);
3138
3139    if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
3140    {
3141        if (tx->version.major < 2)
3142            *rel = (1 << 31) |
3143                ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
3144                ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT)  & D3DSP_REGTYPE_MASK) |
3145                D3DSP_NOSWIZZLE;
3146        else
3147            *rel = TOKEN_NEXT(tx);
3148    }
3149}
3150
3151static void
3152sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
3153{
3154    int8_t shift;
3155    dst->file =
3156        (tok & D3DSP_REGTYPE_MASK)  >> D3DSP_REGTYPE_SHIFT |
3157        (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
3158    dst->type = TGSI_RETURN_TYPE_FLOAT;
3159    dst->idx = tok & D3DSP_REGNUM_MASK;
3160    dst->rel = NULL;
3161    dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
3162    dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
3163    shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
3164    dst->shift = (shift & 0x7) - (shift & 0x8);
3165}
3166
3167static void
3168sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
3169{
3170    src->file =
3171        ((tok & D3DSP_REGTYPE_MASK)  >> D3DSP_REGTYPE_SHIFT) |
3172        ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
3173    src->type = TGSI_RETURN_TYPE_FLOAT;
3174    src->idx = tok & D3DSP_REGNUM_MASK;
3175    src->rel = NULL;
3176    src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
3177    src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
3178
3179    switch (src->file) {
3180    case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
3181    case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
3182    case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
3183    default:
3184        break;
3185    }
3186}
3187
3188static void
3189sm1_parse_immediate(struct shader_translator *tx,
3190                    struct sm1_src_param *imm)
3191{
3192    imm->file = NINED3DSPR_IMMEDIATE;
3193    imm->idx = INT_MIN;
3194    imm->rel = NULL;
3195    imm->swizzle = NINED3DSP_NOSWIZZLE;
3196    imm->mod = 0;
3197    switch (tx->insn.opcode) {
3198    case D3DSIO_DEF:
3199        imm->type = NINED3DSPTYPE_FLOAT4;
3200        memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3201        tx->parse += 4;
3202        break;
3203    case D3DSIO_DEFI:
3204        imm->type = NINED3DSPTYPE_INT4;
3205        memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3206        tx->parse += 4;
3207        break;
3208    case D3DSIO_DEFB:
3209        imm->type = NINED3DSPTYPE_BOOL;
3210        memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
3211        tx->parse += 1;
3212        break;
3213    default:
3214       assert(0);
3215       break;
3216    }
3217}
3218
3219static void
3220sm1_read_dst_param(struct shader_translator *tx,
3221                   struct sm1_dst_param *dst,
3222                   struct sm1_src_param *rel)
3223{
3224    DWORD tok_dst, tok_rel = 0;
3225
3226    sm1_parse_get_param(tx, &tok_dst, &tok_rel);
3227    sm1_parse_dst_param(dst, tok_dst);
3228    if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
3229        sm1_parse_src_param(rel, tok_rel);
3230        dst->rel = rel;
3231    }
3232}
3233
3234static void
3235sm1_read_src_param(struct shader_translator *tx,
3236                   struct sm1_src_param *src,
3237                   struct sm1_src_param *rel)
3238{
3239    DWORD tok_src, tok_rel = 0;
3240
3241    sm1_parse_get_param(tx, &tok_src, &tok_rel);
3242    sm1_parse_src_param(src, tok_src);
3243    if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
3244        assert(rel);
3245        sm1_parse_src_param(rel, tok_rel);
3246        src->rel = rel;
3247    }
3248}
3249
3250static void
3251sm1_read_semantic(struct shader_translator *tx,
3252                  struct sm1_semantic *sem)
3253{
3254    const DWORD tok_usg = TOKEN_NEXT(tx);
3255    const DWORD tok_dst = TOKEN_NEXT(tx);
3256
3257    sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
3258    sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
3259    sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
3260
3261    sm1_parse_dst_param(&sem->reg, tok_dst);
3262}
3263
3264static void
3265sm1_parse_instruction(struct shader_translator *tx)
3266{
3267    struct sm1_instruction *insn = &tx->insn;
3268    HRESULT hr;
3269    DWORD tok;
3270    struct sm1_op_info *info = NULL;
3271    unsigned i;
3272
3273    sm1_parse_comments(tx, TRUE);
3274    sm1_parse_get_skip(tx);
3275
3276    tok = TOKEN_NEXT(tx);
3277
3278    insn->opcode = tok & D3DSI_OPCODE_MASK;
3279    insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
3280    insn->coissue = !!(tok & D3DSI_COISSUE);
3281    insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
3282
3283    if (insn->opcode < ARRAY_SIZE(tx->op_info_map)) {
3284        int k = tx->op_info_map[insn->opcode];
3285        if (k >= 0) {
3286            assert(k < ARRAY_SIZE(inst_table));
3287            info = &inst_table[k];
3288        }
3289    } else {
3290       if (insn->opcode == D3DSIO_PHASE)   info = &inst_phase;
3291       if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
3292    }
3293    if (!info) {
3294       DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
3295       TOKEN_JUMP(tx);
3296       return;
3297    }
3298    insn->info = info;
3299    insn->ndst = info->ndst;
3300    insn->nsrc = info->nsrc;
3301
3302    assert(!insn->predicated && "TODO: predicated instructions");
3303
3304    /* check version */
3305    {
3306        unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
3307        unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
3308        unsigned ver = (tx->version.major << 8) | tx->version.minor;
3309        if (ver < min || ver > max) {
3310            DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
3311                min, ver, max);
3312            return;
3313        }
3314    }
3315
3316    for (i = 0; i < insn->ndst; ++i)
3317        sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
3318    if (insn->predicated)
3319        sm1_read_src_param(tx, &insn->pred, NULL);
3320    for (i = 0; i < insn->nsrc; ++i)
3321        sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
3322
3323    /* parse here so we can dump them before processing */
3324    if (insn->opcode == D3DSIO_DEF ||
3325        insn->opcode == D3DSIO_DEFI ||
3326        insn->opcode == D3DSIO_DEFB)
3327        sm1_parse_immediate(tx, &tx->insn.src[0]);
3328
3329    sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
3330    sm1_instruction_check(insn);
3331
3332    if (info->handler)
3333        hr = info->handler(tx);
3334    else
3335        hr = NineTranslateInstruction_Generic(tx);
3336    tx_apply_dst0_modifiers(tx);
3337
3338    if (hr != D3D_OK)
3339        tx->failure = TRUE;
3340    tx->num_scratch = 0; /* reset */
3341
3342    TOKEN_JUMP(tx);
3343}
3344
3345static void
3346tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
3347{
3348    unsigned i;
3349
3350    tx->info = info;
3351
3352    tx->byte_code = info->byte_code;
3353    tx->parse = info->byte_code;
3354
3355    for (i = 0; i < ARRAY_SIZE(info->input_map); ++i)
3356        info->input_map[i] = NINE_DECLUSAGE_NONE;
3357    info->num_inputs = 0;
3358
3359    info->position_t = FALSE;
3360    info->point_size = FALSE;
3361
3362    tx->info->const_float_slots = 0;
3363    tx->info->const_int_slots = 0;
3364    tx->info->const_bool_slots = 0;
3365
3366    info->sampler_mask = 0x0;
3367    info->rt_mask = 0x0;
3368
3369    info->lconstf.data = NULL;
3370    info->lconstf.ranges = NULL;
3371
3372    info->bumpenvmat_needed = 0;
3373
3374    for (i = 0; i < ARRAY_SIZE(tx->regs.rL); ++i) {
3375        tx->regs.rL[i] = ureg_dst_undef();
3376    }
3377    tx->regs.address = ureg_dst_undef();
3378    tx->regs.a0 = ureg_dst_undef();
3379    tx->regs.p = ureg_dst_undef();
3380    tx->regs.oDepth = ureg_dst_undef();
3381    tx->regs.vPos = ureg_src_undef();
3382    tx->regs.vFace = ureg_src_undef();
3383    for (i = 0; i < ARRAY_SIZE(tx->regs.o); ++i)
3384        tx->regs.o[i] = ureg_dst_undef();
3385    for (i = 0; i < ARRAY_SIZE(tx->regs.oCol); ++i)
3386        tx->regs.oCol[i] = ureg_dst_undef();
3387    for (i = 0; i < ARRAY_SIZE(tx->regs.vC); ++i)
3388        tx->regs.vC[i] = ureg_src_undef();
3389    for (i = 0; i < ARRAY_SIZE(tx->regs.vT); ++i)
3390        tx->regs.vT[i] = ureg_src_undef();
3391
3392    sm1_read_version(tx);
3393
3394    info->version = (tx->version.major << 4) | tx->version.minor;
3395
3396    tx->num_outputs = 0;
3397
3398    create_op_info_map(tx);
3399}
3400
3401static void
3402tx_dtor(struct shader_translator *tx)
3403{
3404    if (tx->num_inst_labels)
3405        FREE(tx->inst_labels);
3406    FREE(tx->lconstf);
3407    FREE(tx->regs.r);
3408    FREE(tx);
3409}
3410
3411/* CONST[0].xyz = width/2, -height/2, zmax-zmin
3412 * CONST[1].xyz = x+width/2, y+height/2, zmin */
3413static void
3414shader_add_vs_viewport_transform(struct shader_translator *tx)
3415{
3416    struct ureg_program *ureg = tx->ureg;
3417    struct ureg_src c0 = NINE_CONSTANT_SRC(0);
3418    struct ureg_src c1 = NINE_CONSTANT_SRC(1);
3419    /* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/
3420
3421    c0 = ureg_src_dimension(c0, 4);
3422    c1 = ureg_src_dimension(c1, 4);
3423    /* TODO: find out when we need to apply the viewport transformation or not.
3424     * Likely will be XYZ vs XYZRHW in vdecl_out
3425     * ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0);
3426     * ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1);
3427     */
3428    ureg_MOV(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos));
3429}
3430
3431static void
3432shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col)
3433{
3434    struct ureg_program *ureg = tx->ureg;
3435    struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
3436    struct ureg_src fog_end, fog_coeff, fog_density;
3437    struct ureg_src fog_vs, depth, fog_color;
3438    struct ureg_dst fog_factor;
3439
3440    if (!tx->info->fog_enable) {
3441        ureg_MOV(ureg, oCol0, src_col);
3442        return;
3443    }
3444
3445    if (tx->info->fog_mode != D3DFOG_NONE) {
3446        depth = nine_get_position_input(tx);
3447        depth = ureg_scalar(depth, TGSI_SWIZZLE_Z);
3448    }
3449
3450    nine_info_mark_const_f_used(tx->info, 33);
3451    fog_color = NINE_CONSTANT_SRC(32);
3452    fog_factor = tx_scratch_scalar(tx);
3453
3454    if (tx->info->fog_mode == D3DFOG_LINEAR) {
3455        fog_end = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3456        fog_coeff = NINE_CONSTANT_SRC_SWIZZLE(33, Y);
3457        ureg_ADD(ureg, fog_factor, fog_end, ureg_negate(depth));
3458        ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff);
3459    } else if (tx->info->fog_mode == D3DFOG_EXP) {
3460        fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3461        ureg_MUL(ureg, fog_factor, depth, fog_density);
3462        ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3463        ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3464    } else if (tx->info->fog_mode == D3DFOG_EXP2) {
3465        fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3466        ureg_MUL(ureg, fog_factor, depth, fog_density);
3467        ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor));
3468        ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3469        ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3470    } else {
3471        fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0,
3472                                            TGSI_INTERPOLATE_PERSPECTIVE),
3473                                            TGSI_SWIZZLE_X);
3474        ureg_MOV(ureg, fog_factor, fog_vs);
3475    }
3476
3477    ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ),
3478             tx_src_scalar(fog_factor), src_col, fog_color);
3479    ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col);
3480}
3481
3482#define GET_CAP(n) screen->get_param( \
3483      screen, PIPE_CAP_##n)
3484#define GET_SHADER_CAP(n) screen->get_shader_param( \
3485      screen, info->type, PIPE_SHADER_CAP_##n)
3486
3487HRESULT
3488nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, struct pipe_context *pipe)
3489{
3490    struct shader_translator *tx;
3491    HRESULT hr = D3D_OK;
3492    const unsigned processor = info->type;
3493    struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen;
3494
3495    user_assert(processor != ~0, D3DERR_INVALIDCALL);
3496
3497    tx = CALLOC_STRUCT(shader_translator);
3498    if (!tx)
3499        return E_OUTOFMEMORY;
3500    tx_ctor(tx, info);
3501
3502    if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
3503        hr = D3DERR_INVALIDCALL;
3504        DBG("Unsupported shader version: %u.%u !\n",
3505            tx->version.major, tx->version.minor);
3506        goto out;
3507    }
3508    if (tx->processor != processor) {
3509        hr = D3DERR_INVALIDCALL;
3510        DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
3511        goto out;
3512    }
3513    DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS",
3514         tx->version.major, tx->version.minor);
3515
3516    tx->ureg = ureg_create(processor);
3517    if (!tx->ureg) {
3518        hr = E_OUTOFMEMORY;
3519        goto out;
3520    }
3521
3522    tx->native_integers = GET_SHADER_CAP(INTEGERS);
3523    tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
3524    tx->lower_preds = !GET_SHADER_CAP(MAX_PREDS);
3525    tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
3526    tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3527    tx->texcoord_sn = tx->want_texcoord ?
3528        TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
3529    tx->wpos_is_sysval = GET_CAP(TGSI_FS_POSITION_IS_SYSVAL);
3530    tx->face_is_sysval_integer = GET_CAP(TGSI_FS_FACE_IS_INTEGER_SYSVAL);
3531
3532    if (IS_VS) {
3533        tx->num_constf_allowed = NINE_MAX_CONST_F;
3534    } else if (tx->version.major < 2) {/* IS_PS v1 */
3535        tx->num_constf_allowed = 8;
3536    } else if (tx->version.major == 2) {/* IS_PS v2 */
3537        tx->num_constf_allowed = 32;
3538    } else {/* IS_PS v3 */
3539        tx->num_constf_allowed = NINE_MAX_CONST_F_PS3;
3540    }
3541
3542    if (tx->version.major < 2) {
3543        tx->num_consti_allowed = 0;
3544        tx->num_constb_allowed = 0;
3545    } else {
3546        tx->num_consti_allowed = NINE_MAX_CONST_I;
3547        tx->num_constb_allowed = NINE_MAX_CONST_B;
3548    }
3549
3550    if (IS_VS && tx->version.major >= 2 && info->swvp_on) {
3551        tx->num_constf_allowed = 8192;
3552        tx->num_consti_allowed = 2048;
3553        tx->num_constb_allowed = 2048;
3554    }
3555
3556    /* VS must always write position. Declare it here to make it the 1st output.
3557     * (Some drivers like nv50 are buggy and rely on that.)
3558     */
3559    if (IS_VS) {
3560        tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
3561    } else {
3562        ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
3563        if (!tx->shift_wpos)
3564            ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3565    }
3566
3567    while (!sm1_parse_eof(tx) && !tx->failure)
3568        sm1_parse_instruction(tx);
3569    tx->parse++; /* for byte_size */
3570
3571    if (tx->failure) {
3572        /* For VS shaders, we print the warning later,
3573         * we first try with swvp. */
3574        if (IS_PS)
3575            ERR("Encountered buggy shader\n");
3576        ureg_destroy(tx->ureg);
3577        hr = D3DERR_INVALIDCALL;
3578        goto out;
3579    }
3580
3581    if (IS_PS && tx->version.major < 3) {
3582        if (tx->version.major < 2) {
3583            assert(tx->num_temp); /* there must be color output */
3584            info->rt_mask |= 0x1;
3585            shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0]));
3586        } else {
3587            shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0]));
3588        }
3589    }
3590
3591    if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
3592        tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0);
3593        ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
3594    }
3595
3596    if (info->position_t)
3597        ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
3598
3599    if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) {
3600        struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
3601        ureg_MAX(tx->ureg, tx->regs.oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min));
3602        ureg_MIN(tx->ureg, oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max));
3603        info->point_size = TRUE;
3604    }
3605
3606    if (info->process_vertices)
3607        shader_add_vs_viewport_transform(tx);
3608
3609    ureg_END(tx->ureg);
3610
3611    /* record local constants */
3612    if (tx->num_lconstf && tx->indirect_const_access) {
3613        struct nine_range *ranges;
3614        float *data;
3615        int *indices;
3616        unsigned i, k, n;
3617
3618        hr = E_OUTOFMEMORY;
3619
3620        data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
3621        if (!data)
3622            goto out;
3623        info->lconstf.data = data;
3624
3625        indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
3626        if (!indices)
3627            goto out;
3628
3629        /* lazy sort, num_lconstf should be small */
3630        for (n = 0; n < tx->num_lconstf; ++n) {
3631            for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
3632                if (tx->lconstf[i].idx < tx->lconstf[k].idx)
3633                    k = i;
3634            }
3635            indices[n] = tx->lconstf[k].idx;
3636            memcpy(&data[n * 4], &tx->lconstf[k].f[0], 4 * sizeof(float));
3637            tx->lconstf[k].idx = INT_MAX;
3638        }
3639
3640        /* count ranges */
3641        for (n = 1, i = 1; i < tx->num_lconstf; ++i)
3642            if (indices[i] != indices[i - 1] + 1)
3643                ++n;
3644        ranges = MALLOC(n * sizeof(ranges[0]));
3645        if (!ranges) {
3646            FREE(indices);
3647            goto out;
3648        }
3649        info->lconstf.ranges = ranges;
3650
3651        k = 0;
3652        ranges[k].bgn = indices[0];
3653        for (i = 1; i < tx->num_lconstf; ++i) {
3654            if (indices[i] != indices[i - 1] + 1) {
3655                ranges[k].next = &ranges[k + 1];
3656                ranges[k].end = indices[i - 1] + 1;
3657                ++k;
3658                ranges[k].bgn = indices[i];
3659            }
3660        }
3661        ranges[k].end = indices[i - 1] + 1;
3662        ranges[k].next = NULL;
3663        assert(n == (k + 1));
3664
3665        FREE(indices);
3666        hr = D3D_OK;
3667    }
3668
3669    /* r500 */
3670    if (info->const_float_slots > device->max_vs_const_f &&
3671        (info->const_int_slots || info->const_bool_slots) &&
3672        (!IS_VS || !info->swvp_on))
3673        ERR("Overlapping constant slots. The shader is likely to be buggy\n");
3674
3675
3676    if (tx->indirect_const_access) /* vs only */
3677        info->const_float_slots = device->max_vs_const_f;
3678
3679    if (!IS_VS || !info->swvp_on) {
3680        unsigned s, slot_max;
3681        unsigned max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f;
3682
3683        slot_max = info->const_bool_slots > 0 ?
3684                       max_const_f + NINE_MAX_CONST_I
3685                       + DIV_ROUND_UP(info->const_bool_slots, 4) :
3686                           info->const_int_slots > 0 ?
3687                               max_const_f + info->const_int_slots :
3688                                   info->const_float_slots;
3689
3690        info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
3691
3692        for (s = 0; s < slot_max; s++)
3693            ureg_DECL_constant(tx->ureg, s);
3694    } else {
3695         ureg_DECL_constant2D(tx->ureg, 0, 4095, 0);
3696         ureg_DECL_constant2D(tx->ureg, 0, 4095, 1);
3697         ureg_DECL_constant2D(tx->ureg, 0, 2047, 2);
3698         ureg_DECL_constant2D(tx->ureg, 0, 511, 3);
3699    }
3700
3701    if (info->process_vertices)
3702        ureg_DECL_constant2D(tx->ureg, 0, 2, 4); /* Viewport data */
3703
3704    if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
3705        unsigned count;
3706        const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, &count);
3707        tgsi_dump(toks, 0);
3708        ureg_free_tokens(toks);
3709    }
3710
3711    if (info->process_vertices) {
3712        NineVertexDeclaration9_FillStreamOutputInfo(info->vdecl_out,
3713                                                    tx->output_info,
3714                                                    tx->num_outputs,
3715                                                    &(info->so));
3716        info->cso = ureg_create_shader_with_so_and_destroy(tx->ureg, pipe, &(info->so));
3717    } else
3718        info->cso = ureg_create_shader_and_destroy(tx->ureg, pipe);
3719    if (!info->cso) {
3720        hr = D3DERR_DRIVERINTERNALERROR;
3721        FREE(info->lconstf.data);
3722        FREE(info->lconstf.ranges);
3723        goto out;
3724    }
3725
3726    info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
3727out:
3728    tx_dtor(tx);
3729    return hr;
3730}
3731