sp_quad_fs.c revision c7daa68ca312cc98abe351be2fef8d8246929627
1344356a0edee932604027386591c82f6666e607cBrian/**************************************************************************
2344356a0edee932604027386591c82f6666e607cBrian *
3344356a0edee932604027386591c82f6666e607cBrian * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4344356a0edee932604027386591c82f6666e607cBrian * All Rights Reserved.
5344356a0edee932604027386591c82f6666e607cBrian *
6344356a0edee932604027386591c82f6666e607cBrian * Permission is hereby granted, free of charge, to any person obtaining a
7344356a0edee932604027386591c82f6666e607cBrian * copy of this software and associated documentation files (the
8344356a0edee932604027386591c82f6666e607cBrian * "Software"), to deal in the Software without restriction, including
9344356a0edee932604027386591c82f6666e607cBrian * without limitation the rights to use, copy, modify, merge, publish,
10344356a0edee932604027386591c82f6666e607cBrian * distribute, sub license, and/or sell copies of the Software, and to
11344356a0edee932604027386591c82f6666e607cBrian * permit persons to whom the Software is furnished to do so, subject to
12344356a0edee932604027386591c82f6666e607cBrian * the following conditions:
13344356a0edee932604027386591c82f6666e607cBrian *
14344356a0edee932604027386591c82f6666e607cBrian * The above copyright notice and this permission notice (including the
15344356a0edee932604027386591c82f6666e607cBrian * next paragraph) shall be included in all copies or substantial portions
16344356a0edee932604027386591c82f6666e607cBrian * of the Software.
17344356a0edee932604027386591c82f6666e607cBrian *
18344356a0edee932604027386591c82f6666e607cBrian * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19344356a0edee932604027386591c82f6666e607cBrian * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20344356a0edee932604027386591c82f6666e607cBrian * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21344356a0edee932604027386591c82f6666e607cBrian * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22344356a0edee932604027386591c82f6666e607cBrian * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23344356a0edee932604027386591c82f6666e607cBrian * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24344356a0edee932604027386591c82f6666e607cBrian * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25344356a0edee932604027386591c82f6666e607cBrian *
26344356a0edee932604027386591c82f6666e607cBrian **************************************************************************/
27344356a0edee932604027386591c82f6666e607cBrian
28344356a0edee932604027386591c82f6666e607cBrian/* Vertices are just an array of floats, with all the attributes
29344356a0edee932604027386591c82f6666e607cBrian * packed.  We currently assume a layout like:
30344356a0edee932604027386591c82f6666e607cBrian *
31344356a0edee932604027386591c82f6666e607cBrian * attr[0][0..3] - window position
32344356a0edee932604027386591c82f6666e607cBrian * attr[1..n][0..3] - remaining attributes.
33344356a0edee932604027386591c82f6666e607cBrian *
34344356a0edee932604027386591c82f6666e607cBrian * Attributes are assumed to be 4 floats wide but are packed so that
35344356a0edee932604027386591c82f6666e607cBrian * all the enabled attributes run contiguously.
36344356a0edee932604027386591c82f6666e607cBrian */
37344356a0edee932604027386591c82f6666e607cBrian
38344356a0edee932604027386591c82f6666e607cBrian#include "pipe/p_util.h"
39344356a0edee932604027386591c82f6666e607cBrian#include "pipe/p_defines.h"
40344356a0edee932604027386591c82f6666e607cBrian#include "pipe/p_shader_tokens.h"
41344356a0edee932604027386591c82f6666e607cBrian
42344356a0edee932604027386591c82f6666e607cBrian#include "sp_context.h"
43344356a0edee932604027386591c82f6666e607cBrian#include "sp_state.h"
44344356a0edee932604027386591c82f6666e607cBrian#include "sp_headers.h"
45344356a0edee932604027386591c82f6666e607cBrian#include "sp_quad.h"
46344356a0edee932604027386591c82f6666e607cBrian#include "sp_texture.h"
47344356a0edee932604027386591c82f6666e607cBrian#include "sp_tex_sample.h"
48344356a0edee932604027386591c82f6666e607cBrian
49344356a0edee932604027386591c82f6666e607cBrian
50344356a0edee932604027386591c82f6666e607cBrianstruct quad_shade_stage
51344356a0edee932604027386591c82f6666e607cBrian{
52344356a0edee932604027386591c82f6666e607cBrian   struct quad_stage stage;
53344356a0edee932604027386591c82f6666e607cBrian   struct tgsi_sampler samplers[PIPE_MAX_SAMPLERS];
54344356a0edee932604027386591c82f6666e607cBrian   struct tgsi_exec_machine machine;
55344356a0edee932604027386591c82f6666e607cBrian   struct tgsi_exec_vector *inputs, *outputs;
56344356a0edee932604027386591c82f6666e607cBrian   int colorOutSlot, depthOutSlot;
57344356a0edee932604027386591c82f6666e607cBrian};
58344356a0edee932604027386591c82f6666e607cBrian
59344356a0edee932604027386591c82f6666e607cBrian
60344356a0edee932604027386591c82f6666e607cBrian/** cast wrapper */
61344356a0edee932604027386591c82f6666e607cBrianstatic INLINE struct quad_shade_stage *
62344356a0edee932604027386591c82f6666e607cBrianquad_shade_stage(struct quad_stage *qs)
63344356a0edee932604027386591c82f6666e607cBrian{
64344356a0edee932604027386591c82f6666e607cBrian   return (struct quad_shade_stage *) qs;
65344356a0edee932604027386591c82f6666e607cBrian}
66344356a0edee932604027386591c82f6666e607cBrian
67344356a0edee932604027386591c82f6666e607cBrian
68344356a0edee932604027386591c82f6666e607cBrian
69344356a0edee932604027386591c82f6666e607cBrian/**
70344356a0edee932604027386591c82f6666e607cBrian * Execute fragment shader for the four fragments in the quad.
71344356a0edee932604027386591c82f6666e607cBrian */
72344356a0edee932604027386591c82f6666e607cBrianstatic void
73344356a0edee932604027386591c82f6666e607cBrianshade_quad(
74344356a0edee932604027386591c82f6666e607cBrian   struct quad_stage *qs,
75344356a0edee932604027386591c82f6666e607cBrian   struct quad_header *quad )
76344356a0edee932604027386591c82f6666e607cBrian{
77344356a0edee932604027386591c82f6666e607cBrian   struct quad_shade_stage *qss = quad_shade_stage( qs );
78344356a0edee932604027386591c82f6666e607cBrian   struct softpipe_context *softpipe = qs->softpipe;
79344356a0edee932604027386591c82f6666e607cBrian   struct tgsi_exec_machine *machine = &qss->machine;
80344356a0edee932604027386591c82f6666e607cBrian
81344356a0edee932604027386591c82f6666e607cBrian   /* Consts do not require 16 byte alignment. */
82344356a0edee932604027386591c82f6666e607cBrian   machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT];
83344356a0edee932604027386591c82f6666e607cBrian
84344356a0edee932604027386591c82f6666e607cBrian   machine->InterpCoefs = quad->coef;
85344356a0edee932604027386591c82f6666e607cBrian
86344356a0edee932604027386591c82f6666e607cBrian   /* run shader */
87344356a0edee932604027386591c82f6666e607cBrian   quad->mask &= softpipe->fs->run( softpipe->fs,
88344356a0edee932604027386591c82f6666e607cBrian				    &qss->machine,
89344356a0edee932604027386591c82f6666e607cBrian				    quad );
90344356a0edee932604027386591c82f6666e607cBrian
91c7daa68ca312cc98abe351be2fef8d8246929627Brian#if 0 /* XXX multi color outputs - untested */
92c7daa68ca312cc98abe351be2fef8d8246929627Brian   /* store outputs */
93c7daa68ca312cc98abe351be2fef8d8246929627Brian   boolean z_written = FALSE;
94c7daa68ca312cc98abe351be2fef8d8246929627Brian   {
95c7daa68ca312cc98abe351be2fef8d8246929627Brian      const ubyte *sem_name = softpipe->fs->info.output_semantic_name;
96c7daa68ca312cc98abe351be2fef8d8246929627Brian      const ubyte *sem_index = softpipe->fs->info.output_semantic_index;
97c7daa68ca312cc98abe351be2fef8d8246929627Brian      const uint n = qss->stage.softpipe->fs->info.num_outputs;
98c7daa68ca312cc98abe351be2fef8d8246929627Brian      uint i;
99c7daa68ca312cc98abe351be2fef8d8246929627Brian      for (i = 0; i < n; i++) {
100c7daa68ca312cc98abe351be2fef8d8246929627Brian         switch (sem_name[i]) {
101c7daa68ca312cc98abe351be2fef8d8246929627Brian         case TGSI_SEMANTIC_COLOR:
102c7daa68ca312cc98abe351be2fef8d8246929627Brian            {
103c7daa68ca312cc98abe351be2fef8d8246929627Brian               uint cbuf = sem_index[i];
104c7daa68ca312cc98abe351be2fef8d8246929627Brian               memcpy(quad->outputs.color[cbuf],
105c7daa68ca312cc98abe351be2fef8d8246929627Brian                      &machine->Outputs[i].xyzw[0].f[0],
106c7daa68ca312cc98abe351be2fef8d8246929627Brian                      sizeof(quad->outputs.color[0]) );
107c7daa68ca312cc98abe351be2fef8d8246929627Brian            }
108c7daa68ca312cc98abe351be2fef8d8246929627Brian            break;
109c7daa68ca312cc98abe351be2fef8d8246929627Brian         case TGSI_SEMANTIC_POSITION:
110c7daa68ca312cc98abe351be2fef8d8246929627Brian            {
111c7daa68ca312cc98abe351be2fef8d8246929627Brian               uint j;
112c7daa68ca312cc98abe351be2fef8d8246929627Brian               for (j = 0; j < 4; j++) {
113c7daa68ca312cc98abe351be2fef8d8246929627Brian                  quad->outputs.depth[j] = machine->Outputs[0].xyzw[2].f[j];
114c7daa68ca312cc98abe351be2fef8d8246929627Brian               }
115c7daa68ca312cc98abe351be2fef8d8246929627Brian               z_written = TRUE;
116c7daa68ca312cc98abe351be2fef8d8246929627Brian            }
117c7daa68ca312cc98abe351be2fef8d8246929627Brian            break;
118c7daa68ca312cc98abe351be2fef8d8246929627Brian         }
119c7daa68ca312cc98abe351be2fef8d8246929627Brian      }
120c7daa68ca312cc98abe351be2fef8d8246929627Brian   }
121c7daa68ca312cc98abe351be2fef8d8246929627Brian
122c7daa68ca312cc98abe351be2fef8d8246929627Brian   if (!z_written) {
123c7daa68ca312cc98abe351be2fef8d8246929627Brian      /* compute Z values now, as in the quad earlyz stage */
124c7daa68ca312cc98abe351be2fef8d8246929627Brian      /* XXX we should really only do this if the earlyz stage is not used */
125c7daa68ca312cc98abe351be2fef8d8246929627Brian      const float fx = (float) quad->x0;
126c7daa68ca312cc98abe351be2fef8d8246929627Brian      const float fy = (float) quad->y0;
127c7daa68ca312cc98abe351be2fef8d8246929627Brian      const float dzdx = quad->posCoef->dadx[2];
128c7daa68ca312cc98abe351be2fef8d8246929627Brian      const float dzdy = quad->posCoef->dady[2];
129c7daa68ca312cc98abe351be2fef8d8246929627Brian      const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy;
130c7daa68ca312cc98abe351be2fef8d8246929627Brian
131c7daa68ca312cc98abe351be2fef8d8246929627Brian      quad->outputs.depth[0] = z0;
132c7daa68ca312cc98abe351be2fef8d8246929627Brian      quad->outputs.depth[1] = z0 + dzdx;
133c7daa68ca312cc98abe351be2fef8d8246929627Brian      quad->outputs.depth[2] = z0 + dzdy;
134c7daa68ca312cc98abe351be2fef8d8246929627Brian      quad->outputs.depth[3] = z0 + dzdx + dzdy;
135c7daa68ca312cc98abe351be2fef8d8246929627Brian   }
136c7daa68ca312cc98abe351be2fef8d8246929627Brian#endif
137c7daa68ca312cc98abe351be2fef8d8246929627Brian
138c7daa68ca312cc98abe351be2fef8d8246929627Brian   /* store result color(s) */
139344356a0edee932604027386591c82f6666e607cBrian   if (qss->colorOutSlot >= 0) {
140344356a0edee932604027386591c82f6666e607cBrian      /* XXX need to handle multiple color outputs someday */
141c7daa68ca312cc98abe351be2fef8d8246929627Brian      assert(softpipe->fs->info.output_semantic_name[qss->colorOutSlot]
142344356a0edee932604027386591c82f6666e607cBrian             == TGSI_SEMANTIC_COLOR);
143344356a0edee932604027386591c82f6666e607cBrian      memcpy(
144c7daa68ca312cc98abe351be2fef8d8246929627Brian             quad->outputs.color[0],
145344356a0edee932604027386591c82f6666e607cBrian             &machine->Outputs[qss->colorOutSlot].xyzw[0].f[0],
146c7daa68ca312cc98abe351be2fef8d8246929627Brian             sizeof( quad->outputs.color[0] ) );
147344356a0edee932604027386591c82f6666e607cBrian   }
148344356a0edee932604027386591c82f6666e607cBrian
149344356a0edee932604027386591c82f6666e607cBrian   /* store result Z */
150344356a0edee932604027386591c82f6666e607cBrian   if (qss->depthOutSlot >= 0) {
151344356a0edee932604027386591c82f6666e607cBrian      /* output[slot] is new Z */
152344356a0edee932604027386591c82f6666e607cBrian      uint i;
153344356a0edee932604027386591c82f6666e607cBrian      for (i = 0; i < 4; i++) {
154344356a0edee932604027386591c82f6666e607cBrian         quad->outputs.depth[i] = machine->Outputs[0].xyzw[2].f[i];
155344356a0edee932604027386591c82f6666e607cBrian      }
156344356a0edee932604027386591c82f6666e607cBrian   }
157344356a0edee932604027386591c82f6666e607cBrian   else {
158cbfe6ee5d58e7342012392a8ead7ae373625c00aBrian      /* compute Z values now, as in the quad earlyz stage */
159cbfe6ee5d58e7342012392a8ead7ae373625c00aBrian      /* XXX we should really only do this if the earlyz stage is not used */
160cbfe6ee5d58e7342012392a8ead7ae373625c00aBrian      const float fx = (float) quad->x0;
161cbfe6ee5d58e7342012392a8ead7ae373625c00aBrian      const float fy = (float) quad->y0;
162cbfe6ee5d58e7342012392a8ead7ae373625c00aBrian      const float dzdx = quad->posCoef->dadx[2];
163cbfe6ee5d58e7342012392a8ead7ae373625c00aBrian      const float dzdy = quad->posCoef->dady[2];
164cbfe6ee5d58e7342012392a8ead7ae373625c00aBrian      const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy;
165cbfe6ee5d58e7342012392a8ead7ae373625c00aBrian
166cbfe6ee5d58e7342012392a8ead7ae373625c00aBrian      quad->outputs.depth[0] = z0;
167cbfe6ee5d58e7342012392a8ead7ae373625c00aBrian      quad->outputs.depth[1] = z0 + dzdx;
168cbfe6ee5d58e7342012392a8ead7ae373625c00aBrian      quad->outputs.depth[2] = z0 + dzdy;
169cbfe6ee5d58e7342012392a8ead7ae373625c00aBrian      quad->outputs.depth[3] = z0 + dzdx + dzdy;
170344356a0edee932604027386591c82f6666e607cBrian   }
171344356a0edee932604027386591c82f6666e607cBrian
172344356a0edee932604027386591c82f6666e607cBrian   /* shader may cull fragments */
173344356a0edee932604027386591c82f6666e607cBrian   if( quad->mask ) {
174344356a0edee932604027386591c82f6666e607cBrian      qs->next->run( qs->next, quad );
175344356a0edee932604027386591c82f6666e607cBrian   }
176344356a0edee932604027386591c82f6666e607cBrian}
177344356a0edee932604027386591c82f6666e607cBrian
178344356a0edee932604027386591c82f6666e607cBrian/**
179344356a0edee932604027386591c82f6666e607cBrian * Per-primitive (or per-begin?) setup
180344356a0edee932604027386591c82f6666e607cBrian */
181344356a0edee932604027386591c82f6666e607cBrianstatic void shade_begin(struct quad_stage *qs)
182344356a0edee932604027386591c82f6666e607cBrian{
183344356a0edee932604027386591c82f6666e607cBrian   struct quad_shade_stage *qss = quad_shade_stage(qs);
184344356a0edee932604027386591c82f6666e607cBrian   struct softpipe_context *softpipe = qs->softpipe;
185344356a0edee932604027386591c82f6666e607cBrian   unsigned i;
186344356a0edee932604027386591c82f6666e607cBrian   unsigned num = MAX2(softpipe->num_textures, softpipe->num_samplers);
187344356a0edee932604027386591c82f6666e607cBrian
188344356a0edee932604027386591c82f6666e607cBrian   /* set TGSI sampler state that varies */
189344356a0edee932604027386591c82f6666e607cBrian   for (i = 0; i < num; i++) {
190344356a0edee932604027386591c82f6666e607cBrian      qss->samplers[i].state = softpipe->sampler[i];
191344356a0edee932604027386591c82f6666e607cBrian      qss->samplers[i].texture = softpipe->texture[i];
192344356a0edee932604027386591c82f6666e607cBrian   }
193344356a0edee932604027386591c82f6666e607cBrian
194344356a0edee932604027386591c82f6666e607cBrian   /* find output slots for depth, color */
195344356a0edee932604027386591c82f6666e607cBrian   qss->colorOutSlot = -1;
196344356a0edee932604027386591c82f6666e607cBrian   qss->depthOutSlot = -1;
197344356a0edee932604027386591c82f6666e607cBrian   for (i = 0; i < qss->stage.softpipe->fs->info.num_outputs; i++) {
198344356a0edee932604027386591c82f6666e607cBrian      switch (qss->stage.softpipe->fs->info.output_semantic_name[i]) {
199344356a0edee932604027386591c82f6666e607cBrian      case TGSI_SEMANTIC_POSITION:
200344356a0edee932604027386591c82f6666e607cBrian         qss->depthOutSlot = i;
201344356a0edee932604027386591c82f6666e607cBrian         break;
202344356a0edee932604027386591c82f6666e607cBrian      case TGSI_SEMANTIC_COLOR:
203344356a0edee932604027386591c82f6666e607cBrian         qss->colorOutSlot = i;
204344356a0edee932604027386591c82f6666e607cBrian         break;
205344356a0edee932604027386591c82f6666e607cBrian      }
206344356a0edee932604027386591c82f6666e607cBrian   }
207344356a0edee932604027386591c82f6666e607cBrian
208344356a0edee932604027386591c82f6666e607cBrian   softpipe->fs->prepare( softpipe->fs,
209344356a0edee932604027386591c82f6666e607cBrian			  &qss->machine,
210344356a0edee932604027386591c82f6666e607cBrian			  qss->samplers );
211344356a0edee932604027386591c82f6666e607cBrian
212344356a0edee932604027386591c82f6666e607cBrian   qs->next->begin(qs->next);
213344356a0edee932604027386591c82f6666e607cBrian}
214344356a0edee932604027386591c82f6666e607cBrian
215344356a0edee932604027386591c82f6666e607cBrian
216344356a0edee932604027386591c82f6666e607cBrianstatic void shade_destroy(struct quad_stage *qs)
217344356a0edee932604027386591c82f6666e607cBrian{
218344356a0edee932604027386591c82f6666e607cBrian   struct quad_shade_stage *qss = (struct quad_shade_stage *) qs;
219344356a0edee932604027386591c82f6666e607cBrian
220344356a0edee932604027386591c82f6666e607cBrian   tgsi_exec_machine_free_data(&qss->machine);
221344356a0edee932604027386591c82f6666e607cBrian   FREE( qss->inputs );
222344356a0edee932604027386591c82f6666e607cBrian   FREE( qss->outputs );
223344356a0edee932604027386591c82f6666e607cBrian   FREE( qs );
224344356a0edee932604027386591c82f6666e607cBrian}
225344356a0edee932604027386591c82f6666e607cBrian
226344356a0edee932604027386591c82f6666e607cBrian
227344356a0edee932604027386591c82f6666e607cBrianstruct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe )
228344356a0edee932604027386591c82f6666e607cBrian{
229344356a0edee932604027386591c82f6666e607cBrian   struct quad_shade_stage *qss = CALLOC_STRUCT(quad_shade_stage);
230344356a0edee932604027386591c82f6666e607cBrian   uint i;
231344356a0edee932604027386591c82f6666e607cBrian
232344356a0edee932604027386591c82f6666e607cBrian   /* allocate storage for program inputs/outputs, aligned to 16 bytes */
23339038c11699bbc9baab744542e96d54e91cb452aBrian   qss->inputs = MALLOC(PIPE_MAX_ATTRIBS * sizeof(*qss->inputs) + 16);
23439038c11699bbc9baab744542e96d54e91cb452aBrian   qss->outputs = MALLOC(PIPE_MAX_ATTRIBS * sizeof(*qss->outputs) + 16);
235344356a0edee932604027386591c82f6666e607cBrian   qss->machine.Inputs = align16(qss->inputs);
236344356a0edee932604027386591c82f6666e607cBrian   qss->machine.Outputs = align16(qss->outputs);
237344356a0edee932604027386591c82f6666e607cBrian
238344356a0edee932604027386591c82f6666e607cBrian   qss->stage.softpipe = softpipe;
239344356a0edee932604027386591c82f6666e607cBrian   qss->stage.begin = shade_begin;
240344356a0edee932604027386591c82f6666e607cBrian   qss->stage.run = shade_quad;
241344356a0edee932604027386591c82f6666e607cBrian   qss->stage.destroy = shade_destroy;
242344356a0edee932604027386591c82f6666e607cBrian
243344356a0edee932604027386591c82f6666e607cBrian   /* set TGSI sampler state that's constant */
244344356a0edee932604027386591c82f6666e607cBrian   for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
245344356a0edee932604027386591c82f6666e607cBrian      assert(softpipe->tex_cache[i]);
246344356a0edee932604027386591c82f6666e607cBrian      qss->samplers[i].get_samples = sp_get_samples;
247344356a0edee932604027386591c82f6666e607cBrian      qss->samplers[i].pipe = &softpipe->pipe;
248344356a0edee932604027386591c82f6666e607cBrian      qss->samplers[i].cache = softpipe->tex_cache[i];
249344356a0edee932604027386591c82f6666e607cBrian   }
250344356a0edee932604027386591c82f6666e607cBrian
251344356a0edee932604027386591c82f6666e607cBrian   tgsi_exec_machine_init( &qss->machine );
252344356a0edee932604027386591c82f6666e607cBrian
253344356a0edee932604027386591c82f6666e607cBrian   return &qss->stage;
254344356a0edee932604027386591c82f6666e607cBrian}
255