1/**************************************************************************
2 *
3 * Copyright 2009 Marek Olšák <maraeo@gmail.com>
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial portions
15 * of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
20 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27/* This file contains the vertex shader tranformations for SW TCL needed
28 * to overcome the limitations of the r300 rasterizer.
29 *
30 * Transformations:
31 * 1) If the secondary color output is present, the primary color must be
32 *    present too.
33 * 2) If any back-face color output is present, there must be all 4 color
34 *    outputs and missing ones must be inserted.
35 * 3) Insert a trailing texcoord output containing a copy of POS, for WPOS.
36 *
37 * I know this code is cumbersome, but I don't know of any nicer way
38 * of transforming TGSI shaders. ~ M.
39 */
40
41#include "r300_vs.h"
42
43#include <stdio.h>
44
45#include "tgsi/tgsi_transform.h"
46#include "tgsi/tgsi_dump.h"
47
48#include "draw/draw_context.h"
49
50struct vs_transform_context {
51    struct tgsi_transform_context base;
52
53    boolean color_used[2];
54    boolean bcolor_used[2];
55
56    /* Index of the pos output, typically 0. */
57    unsigned pos_output;
58    /* Index of the pos temp where all writes of pos are redirected to. */
59    unsigned pos_temp;
60    /* The index of the last generic output, after which we insert a new
61     * output for WPOS. */
62    int last_generic;
63
64    unsigned num_outputs;
65    /* Used to shift output decl. indices when inserting new ones. */
66    unsigned decl_shift;
67    /* Used to remap writes to output decls if their indices changed. */
68    unsigned out_remap[32];
69
70    /* First instruction processed? */
71    boolean first_instruction;
72    /* End instruction processed? */
73    boolean end_instruction;
74
75    boolean temp_used[1024];
76};
77
78static void emit_temp(struct tgsi_transform_context *ctx, unsigned reg)
79{
80    struct tgsi_full_declaration decl;
81
82    decl = tgsi_default_full_declaration();
83    decl.Declaration.File = TGSI_FILE_TEMPORARY;
84    decl.Range.First = decl.Range.Last = reg;
85    ctx->emit_declaration(ctx, &decl);
86}
87
88static void emit_output(struct tgsi_transform_context *ctx,
89                        unsigned name, unsigned index, unsigned interp,
90                        unsigned reg)
91{
92    struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx;
93    struct tgsi_full_declaration decl;
94
95    decl = tgsi_default_full_declaration();
96    decl.Declaration.File = TGSI_FILE_OUTPUT;
97    decl.Declaration.Interpolate = 1;
98    decl.Declaration.Semantic = TRUE;
99    decl.Semantic.Name = name;
100    decl.Semantic.Index = index;
101    decl.Range.First = decl.Range.Last = reg;
102    decl.Interp.Interpolate = interp;
103    ctx->emit_declaration(ctx, &decl);
104    ++vsctx->num_outputs;
105}
106
107static void insert_output_before(struct tgsi_transform_context *ctx,
108                                 struct tgsi_full_declaration *before,
109                                 unsigned name, unsigned index, unsigned interp)
110{
111    struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx;
112    unsigned i;
113
114    /* Make a place for the new output. */
115    for (i = before->Range.First; i < Elements(vsctx->out_remap); i++) {
116        ++vsctx->out_remap[i];
117    }
118
119    /* Insert the new output. */
120    emit_output(ctx, name, index, interp,
121                before->Range.First + vsctx->decl_shift);
122
123    ++vsctx->decl_shift;
124}
125
126static void insert_output_after(struct tgsi_transform_context *ctx,
127                                struct tgsi_full_declaration *after,
128                                unsigned name, unsigned index, unsigned interp)
129{
130    struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx;
131    unsigned i;
132
133    /* Make a place for the new output. */
134    for (i = after->Range.First+1; i < Elements(vsctx->out_remap); i++) {
135        ++vsctx->out_remap[i];
136    }
137
138    /* Insert the new output. */
139    emit_output(ctx, name, index, interp,
140                after->Range.First + 1);
141
142    ++vsctx->decl_shift;
143}
144
145static void transform_decl(struct tgsi_transform_context *ctx,
146                           struct tgsi_full_declaration *decl)
147{
148    struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx;
149    unsigned i;
150
151    if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
152        switch (decl->Semantic.Name) {
153            case TGSI_SEMANTIC_POSITION:
154                vsctx->pos_output = decl->Range.First;
155                break;
156
157            case TGSI_SEMANTIC_COLOR:
158                assert(decl->Semantic.Index < 2);
159
160                /* We must rasterize the first color if the second one is
161                 * used, otherwise the rasterizer doesn't do the color
162                 * selection correctly. Declare it, but don't write to it. */
163                if (decl->Semantic.Index == 1 && !vsctx->color_used[0]) {
164                    insert_output_before(ctx, decl, TGSI_SEMANTIC_COLOR, 0,
165                                         TGSI_INTERPOLATE_LINEAR);
166                    vsctx->color_used[0] = TRUE;
167                }
168                break;
169
170            case TGSI_SEMANTIC_BCOLOR:
171                assert(decl->Semantic.Index < 2);
172
173                /* We must rasterize all 4 colors if back-face colors are
174                 * used, otherwise the rasterizer doesn't do the color
175                 * selection correctly. Declare it, but don't write to it. */
176                if (!vsctx->color_used[0]) {
177                    insert_output_before(ctx, decl, TGSI_SEMANTIC_COLOR, 0,
178                                         TGSI_INTERPOLATE_LINEAR);
179                    vsctx->color_used[0] = TRUE;
180                }
181                if (!vsctx->color_used[1]) {
182                    insert_output_before(ctx, decl, TGSI_SEMANTIC_COLOR, 1,
183                                         TGSI_INTERPOLATE_LINEAR);
184                    vsctx->color_used[1] = TRUE;
185                }
186                if (decl->Semantic.Index == 1 && !vsctx->bcolor_used[0]) {
187                    insert_output_before(ctx, decl, TGSI_SEMANTIC_BCOLOR, 0,
188                                         TGSI_INTERPOLATE_LINEAR);
189                    vsctx->bcolor_used[0] = TRUE;
190                }
191                break;
192
193            case TGSI_SEMANTIC_GENERIC:
194                vsctx->last_generic = MAX2(vsctx->last_generic, decl->Semantic.Index);
195                break;
196        }
197
198        /* Since we're inserting new outputs in between, the following outputs
199         * should be moved to the right so that they don't overlap with
200         * the newly added ones. */
201        decl->Range.First += vsctx->decl_shift;
202        decl->Range.Last += vsctx->decl_shift;
203
204        ++vsctx->num_outputs;
205    } else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
206        for (i = decl->Range.First; i <= decl->Range.Last; i++) {
207           vsctx->temp_used[i] = TRUE;
208        }
209    }
210
211    ctx->emit_declaration(ctx, decl);
212
213    /* Insert BCOLOR1 if needed. */
214    if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
215        decl->Semantic.Name == TGSI_SEMANTIC_BCOLOR &&
216        !vsctx->bcolor_used[1]) {
217        insert_output_after(ctx, decl, TGSI_SEMANTIC_BCOLOR, 1,
218                            TGSI_INTERPOLATE_LINEAR);
219    }
220}
221
222static void transform_inst(struct tgsi_transform_context *ctx,
223                           struct tgsi_full_instruction *inst)
224{
225    struct vs_transform_context *vsctx = (struct vs_transform_context *) ctx;
226    struct tgsi_full_instruction new_inst;
227    unsigned i;
228
229    if (!vsctx->first_instruction) {
230        vsctx->first_instruction = TRUE;
231
232        /* Insert the generic output for WPOS. */
233        emit_output(ctx, TGSI_SEMANTIC_GENERIC, vsctx->last_generic + 1,
234                    TGSI_INTERPOLATE_PERSPECTIVE, vsctx->num_outputs);
235
236        /* Find a free temp for POSITION. */
237        for (i = 0; i < Elements(vsctx->temp_used); i++) {
238            if (!vsctx->temp_used[i]) {
239                emit_temp(ctx, i);
240                vsctx->pos_temp = i;
241                break;
242            }
243        }
244    }
245
246    if (inst->Instruction.Opcode == TGSI_OPCODE_END) {
247        /* MOV OUT[pos_output], TEMP[pos_temp]; */
248        new_inst = tgsi_default_full_instruction();
249        new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
250        new_inst.Instruction.NumDstRegs = 1;
251        new_inst.Dst[0].Register.File = TGSI_FILE_OUTPUT;
252        new_inst.Dst[0].Register.Index = vsctx->pos_output;
253        new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
254        new_inst.Instruction.NumSrcRegs = 1;
255        new_inst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
256        new_inst.Src[0].Register.Index = vsctx->pos_temp;
257        ctx->emit_instruction(ctx, &new_inst);
258
259        /* MOV OUT[n-1], TEMP[pos_temp]; */
260        new_inst = tgsi_default_full_instruction();
261        new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
262        new_inst.Instruction.NumDstRegs = 1;
263        new_inst.Dst[0].Register.File = TGSI_FILE_OUTPUT;
264        new_inst.Dst[0].Register.Index = vsctx->num_outputs - 1;
265        new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
266        new_inst.Instruction.NumSrcRegs = 1;
267        new_inst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
268        new_inst.Src[0].Register.Index = vsctx->pos_temp;
269        ctx->emit_instruction(ctx, &new_inst);
270
271        vsctx->end_instruction = TRUE;
272    } else {
273        /* Not an END instruction. */
274        /* Fix writes to outputs. */
275        for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
276            struct tgsi_full_dst_register *dst = &inst->Dst[i];
277            if (dst->Register.File == TGSI_FILE_OUTPUT) {
278                if (dst->Register.Index == vsctx->pos_output) {
279                    /* Replace writes to OUT[pos_output] with TEMP[pos_temp]. */
280                    dst->Register.File = TGSI_FILE_TEMPORARY;
281                    dst->Register.Index = vsctx->pos_temp;
282                } else {
283                    /* Not a position, good...
284                     * Since we were changing the indices of output decls,
285                     * we must redirect writes into them too. */
286                    dst->Register.Index = vsctx->out_remap[dst->Register.Index];
287                }
288            }
289        }
290
291        /* Inserting 2 instructions before the END opcode moves all following
292         * labels by 2. Subroutines are always after the END opcode so
293         * they're always moved. */
294        if (inst->Instruction.Opcode == TGSI_OPCODE_CAL) {
295            inst->Label.Label += 2;
296        }
297        /* The labels of the following opcodes are moved only after
298         * the END opcode. */
299        if (vsctx->end_instruction &&
300            (inst->Instruction.Opcode == TGSI_OPCODE_IF ||
301             inst->Instruction.Opcode == TGSI_OPCODE_ELSE ||
302             inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP ||
303             inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP)) {
304            inst->Label.Label += 2;
305        }
306    }
307
308    ctx->emit_instruction(ctx, inst);
309}
310
311void r300_draw_init_vertex_shader(struct r300_context *r300,
312                                  struct r300_vertex_shader *vs)
313{
314    struct draw_context *draw = r300->draw;
315    struct pipe_shader_state new_vs;
316    struct tgsi_shader_info info;
317    struct vs_transform_context transform;
318    const uint newLen = tgsi_num_tokens(vs->state.tokens) + 100 /* XXX */;
319    unsigned i;
320
321    tgsi_scan_shader(vs->state.tokens, &info);
322
323    new_vs.tokens = tgsi_alloc_tokens(newLen);
324    if (new_vs.tokens == NULL)
325        return;
326
327    memset(&transform, 0, sizeof(transform));
328    for (i = 0; i < Elements(transform.out_remap); i++) {
329        transform.out_remap[i] = i;
330    }
331    transform.last_generic = -1;
332    transform.base.transform_instruction = transform_inst;
333    transform.base.transform_declaration = transform_decl;
334
335    for (i = 0; i < info.num_outputs; i++) {
336        unsigned index = info.output_semantic_index[i];
337
338        switch (info.output_semantic_name[i]) {
339            case TGSI_SEMANTIC_COLOR:
340                assert(index < 2);
341                transform.color_used[index] = TRUE;
342                break;
343
344            case TGSI_SEMANTIC_BCOLOR:
345                assert(index < 2);
346                transform.bcolor_used[index] = TRUE;
347                break;
348        }
349    }
350
351    tgsi_transform_shader(vs->state.tokens,
352                          (struct tgsi_token*)new_vs.tokens,
353                          newLen, &transform.base);
354
355#if 0
356    printf("----------------------------------------------\norig shader:\n");
357    tgsi_dump(vs->state.tokens, 0);
358    printf("----------------------------------------------\nnew shader:\n");
359    tgsi_dump(new_vs.tokens, 0);
360    printf("----------------------------------------------\n");
361#endif
362
363    /* Free old tokens. */
364    FREE((void*)vs->state.tokens);
365
366    vs->draw_vs = draw_create_vertex_shader(draw, &new_vs);
367
368    /* Instead of duplicating and freeing the tokens, copy the pointer directly. */
369    vs->state.tokens = new_vs.tokens;
370
371    /* Init the VS output table for the rasterizer. */
372    r300_init_vs_outputs(r300, vs);
373
374    /* Make the last generic be WPOS. */
375    vs->outputs.wpos = vs->outputs.generic[transform.last_generic + 1];
376    vs->outputs.generic[transform.last_generic + 1] = ATTR_UNUSED;
377}
378