sp_quad_fs.c revision c7daa68ca312cc98abe351be2fef8d8246929627
1344356a0edee932604027386591c82f6666e607cBrian/************************************************************************** 2344356a0edee932604027386591c82f6666e607cBrian * 3344356a0edee932604027386591c82f6666e607cBrian * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. 4344356a0edee932604027386591c82f6666e607cBrian * All Rights Reserved. 5344356a0edee932604027386591c82f6666e607cBrian * 6344356a0edee932604027386591c82f6666e607cBrian * Permission is hereby granted, free of charge, to any person obtaining a 7344356a0edee932604027386591c82f6666e607cBrian * copy of this software and associated documentation files (the 8344356a0edee932604027386591c82f6666e607cBrian * "Software"), to deal in the Software without restriction, including 9344356a0edee932604027386591c82f6666e607cBrian * without limitation the rights to use, copy, modify, merge, publish, 10344356a0edee932604027386591c82f6666e607cBrian * distribute, sub license, and/or sell copies of the Software, and to 11344356a0edee932604027386591c82f6666e607cBrian * permit persons to whom the Software is furnished to do so, subject to 12344356a0edee932604027386591c82f6666e607cBrian * the following conditions: 13344356a0edee932604027386591c82f6666e607cBrian * 14344356a0edee932604027386591c82f6666e607cBrian * The above copyright notice and this permission notice (including the 15344356a0edee932604027386591c82f6666e607cBrian * next paragraph) shall be included in all copies or substantial portions 16344356a0edee932604027386591c82f6666e607cBrian * of the Software. 17344356a0edee932604027386591c82f6666e607cBrian * 18344356a0edee932604027386591c82f6666e607cBrian * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19344356a0edee932604027386591c82f6666e607cBrian * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20344356a0edee932604027386591c82f6666e607cBrian * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21344356a0edee932604027386591c82f6666e607cBrian * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22344356a0edee932604027386591c82f6666e607cBrian * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23344356a0edee932604027386591c82f6666e607cBrian * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24344356a0edee932604027386591c82f6666e607cBrian * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25344356a0edee932604027386591c82f6666e607cBrian * 26344356a0edee932604027386591c82f6666e607cBrian **************************************************************************/ 27344356a0edee932604027386591c82f6666e607cBrian 28344356a0edee932604027386591c82f6666e607cBrian/* Vertices are just an array of floats, with all the attributes 29344356a0edee932604027386591c82f6666e607cBrian * packed. We currently assume a layout like: 30344356a0edee932604027386591c82f6666e607cBrian * 31344356a0edee932604027386591c82f6666e607cBrian * attr[0][0..3] - window position 32344356a0edee932604027386591c82f6666e607cBrian * attr[1..n][0..3] - remaining attributes. 33344356a0edee932604027386591c82f6666e607cBrian * 34344356a0edee932604027386591c82f6666e607cBrian * Attributes are assumed to be 4 floats wide but are packed so that 35344356a0edee932604027386591c82f6666e607cBrian * all the enabled attributes run contiguously. 36344356a0edee932604027386591c82f6666e607cBrian */ 37344356a0edee932604027386591c82f6666e607cBrian 38344356a0edee932604027386591c82f6666e607cBrian#include "pipe/p_util.h" 39344356a0edee932604027386591c82f6666e607cBrian#include "pipe/p_defines.h" 40344356a0edee932604027386591c82f6666e607cBrian#include "pipe/p_shader_tokens.h" 41344356a0edee932604027386591c82f6666e607cBrian 42344356a0edee932604027386591c82f6666e607cBrian#include "sp_context.h" 43344356a0edee932604027386591c82f6666e607cBrian#include "sp_state.h" 44344356a0edee932604027386591c82f6666e607cBrian#include "sp_headers.h" 45344356a0edee932604027386591c82f6666e607cBrian#include "sp_quad.h" 46344356a0edee932604027386591c82f6666e607cBrian#include "sp_texture.h" 47344356a0edee932604027386591c82f6666e607cBrian#include "sp_tex_sample.h" 48344356a0edee932604027386591c82f6666e607cBrian 49344356a0edee932604027386591c82f6666e607cBrian 50344356a0edee932604027386591c82f6666e607cBrianstruct quad_shade_stage 51344356a0edee932604027386591c82f6666e607cBrian{ 52344356a0edee932604027386591c82f6666e607cBrian struct quad_stage stage; 53344356a0edee932604027386591c82f6666e607cBrian struct tgsi_sampler samplers[PIPE_MAX_SAMPLERS]; 54344356a0edee932604027386591c82f6666e607cBrian struct tgsi_exec_machine machine; 55344356a0edee932604027386591c82f6666e607cBrian struct tgsi_exec_vector *inputs, *outputs; 56344356a0edee932604027386591c82f6666e607cBrian int colorOutSlot, depthOutSlot; 57344356a0edee932604027386591c82f6666e607cBrian}; 58344356a0edee932604027386591c82f6666e607cBrian 59344356a0edee932604027386591c82f6666e607cBrian 60344356a0edee932604027386591c82f6666e607cBrian/** cast wrapper */ 61344356a0edee932604027386591c82f6666e607cBrianstatic INLINE struct quad_shade_stage * 62344356a0edee932604027386591c82f6666e607cBrianquad_shade_stage(struct quad_stage *qs) 63344356a0edee932604027386591c82f6666e607cBrian{ 64344356a0edee932604027386591c82f6666e607cBrian return (struct quad_shade_stage *) qs; 65344356a0edee932604027386591c82f6666e607cBrian} 66344356a0edee932604027386591c82f6666e607cBrian 67344356a0edee932604027386591c82f6666e607cBrian 68344356a0edee932604027386591c82f6666e607cBrian 69344356a0edee932604027386591c82f6666e607cBrian/** 70344356a0edee932604027386591c82f6666e607cBrian * Execute fragment shader for the four fragments in the quad. 71344356a0edee932604027386591c82f6666e607cBrian */ 72344356a0edee932604027386591c82f6666e607cBrianstatic void 73344356a0edee932604027386591c82f6666e607cBrianshade_quad( 74344356a0edee932604027386591c82f6666e607cBrian struct quad_stage *qs, 75344356a0edee932604027386591c82f6666e607cBrian struct quad_header *quad ) 76344356a0edee932604027386591c82f6666e607cBrian{ 77344356a0edee932604027386591c82f6666e607cBrian struct quad_shade_stage *qss = quad_shade_stage( qs ); 78344356a0edee932604027386591c82f6666e607cBrian struct softpipe_context *softpipe = qs->softpipe; 79344356a0edee932604027386591c82f6666e607cBrian struct tgsi_exec_machine *machine = &qss->machine; 80344356a0edee932604027386591c82f6666e607cBrian 81344356a0edee932604027386591c82f6666e607cBrian /* Consts do not require 16 byte alignment. */ 82344356a0edee932604027386591c82f6666e607cBrian machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT]; 83344356a0edee932604027386591c82f6666e607cBrian 84344356a0edee932604027386591c82f6666e607cBrian machine->InterpCoefs = quad->coef; 85344356a0edee932604027386591c82f6666e607cBrian 86344356a0edee932604027386591c82f6666e607cBrian /* run shader */ 87344356a0edee932604027386591c82f6666e607cBrian quad->mask &= softpipe->fs->run( softpipe->fs, 88344356a0edee932604027386591c82f6666e607cBrian &qss->machine, 89344356a0edee932604027386591c82f6666e607cBrian quad ); 90344356a0edee932604027386591c82f6666e607cBrian 91c7daa68ca312cc98abe351be2fef8d8246929627Brian#if 0 /* XXX multi color outputs - untested */ 92c7daa68ca312cc98abe351be2fef8d8246929627Brian /* store outputs */ 93c7daa68ca312cc98abe351be2fef8d8246929627Brian boolean z_written = FALSE; 94c7daa68ca312cc98abe351be2fef8d8246929627Brian { 95c7daa68ca312cc98abe351be2fef8d8246929627Brian const ubyte *sem_name = softpipe->fs->info.output_semantic_name; 96c7daa68ca312cc98abe351be2fef8d8246929627Brian const ubyte *sem_index = softpipe->fs->info.output_semantic_index; 97c7daa68ca312cc98abe351be2fef8d8246929627Brian const uint n = qss->stage.softpipe->fs->info.num_outputs; 98c7daa68ca312cc98abe351be2fef8d8246929627Brian uint i; 99c7daa68ca312cc98abe351be2fef8d8246929627Brian for (i = 0; i < n; i++) { 100c7daa68ca312cc98abe351be2fef8d8246929627Brian switch (sem_name[i]) { 101c7daa68ca312cc98abe351be2fef8d8246929627Brian case TGSI_SEMANTIC_COLOR: 102c7daa68ca312cc98abe351be2fef8d8246929627Brian { 103c7daa68ca312cc98abe351be2fef8d8246929627Brian uint cbuf = sem_index[i]; 104c7daa68ca312cc98abe351be2fef8d8246929627Brian memcpy(quad->outputs.color[cbuf], 105c7daa68ca312cc98abe351be2fef8d8246929627Brian &machine->Outputs[i].xyzw[0].f[0], 106c7daa68ca312cc98abe351be2fef8d8246929627Brian sizeof(quad->outputs.color[0]) ); 107c7daa68ca312cc98abe351be2fef8d8246929627Brian } 108c7daa68ca312cc98abe351be2fef8d8246929627Brian break; 109c7daa68ca312cc98abe351be2fef8d8246929627Brian case TGSI_SEMANTIC_POSITION: 110c7daa68ca312cc98abe351be2fef8d8246929627Brian { 111c7daa68ca312cc98abe351be2fef8d8246929627Brian uint j; 112c7daa68ca312cc98abe351be2fef8d8246929627Brian for (j = 0; j < 4; j++) { 113c7daa68ca312cc98abe351be2fef8d8246929627Brian quad->outputs.depth[j] = machine->Outputs[0].xyzw[2].f[j]; 114c7daa68ca312cc98abe351be2fef8d8246929627Brian } 115c7daa68ca312cc98abe351be2fef8d8246929627Brian z_written = TRUE; 116c7daa68ca312cc98abe351be2fef8d8246929627Brian } 117c7daa68ca312cc98abe351be2fef8d8246929627Brian break; 118c7daa68ca312cc98abe351be2fef8d8246929627Brian } 119c7daa68ca312cc98abe351be2fef8d8246929627Brian } 120c7daa68ca312cc98abe351be2fef8d8246929627Brian } 121c7daa68ca312cc98abe351be2fef8d8246929627Brian 122c7daa68ca312cc98abe351be2fef8d8246929627Brian if (!z_written) { 123c7daa68ca312cc98abe351be2fef8d8246929627Brian /* compute Z values now, as in the quad earlyz stage */ 124c7daa68ca312cc98abe351be2fef8d8246929627Brian /* XXX we should really only do this if the earlyz stage is not used */ 125c7daa68ca312cc98abe351be2fef8d8246929627Brian const float fx = (float) quad->x0; 126c7daa68ca312cc98abe351be2fef8d8246929627Brian const float fy = (float) quad->y0; 127c7daa68ca312cc98abe351be2fef8d8246929627Brian const float dzdx = quad->posCoef->dadx[2]; 128c7daa68ca312cc98abe351be2fef8d8246929627Brian const float dzdy = quad->posCoef->dady[2]; 129c7daa68ca312cc98abe351be2fef8d8246929627Brian const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; 130c7daa68ca312cc98abe351be2fef8d8246929627Brian 131c7daa68ca312cc98abe351be2fef8d8246929627Brian quad->outputs.depth[0] = z0; 132c7daa68ca312cc98abe351be2fef8d8246929627Brian quad->outputs.depth[1] = z0 + dzdx; 133c7daa68ca312cc98abe351be2fef8d8246929627Brian quad->outputs.depth[2] = z0 + dzdy; 134c7daa68ca312cc98abe351be2fef8d8246929627Brian quad->outputs.depth[3] = z0 + dzdx + dzdy; 135c7daa68ca312cc98abe351be2fef8d8246929627Brian } 136c7daa68ca312cc98abe351be2fef8d8246929627Brian#endif 137c7daa68ca312cc98abe351be2fef8d8246929627Brian 138c7daa68ca312cc98abe351be2fef8d8246929627Brian /* store result color(s) */ 139344356a0edee932604027386591c82f6666e607cBrian if (qss->colorOutSlot >= 0) { 140344356a0edee932604027386591c82f6666e607cBrian /* XXX need to handle multiple color outputs someday */ 141c7daa68ca312cc98abe351be2fef8d8246929627Brian assert(softpipe->fs->info.output_semantic_name[qss->colorOutSlot] 142344356a0edee932604027386591c82f6666e607cBrian == TGSI_SEMANTIC_COLOR); 143344356a0edee932604027386591c82f6666e607cBrian memcpy( 144c7daa68ca312cc98abe351be2fef8d8246929627Brian quad->outputs.color[0], 145344356a0edee932604027386591c82f6666e607cBrian &machine->Outputs[qss->colorOutSlot].xyzw[0].f[0], 146c7daa68ca312cc98abe351be2fef8d8246929627Brian sizeof( quad->outputs.color[0] ) ); 147344356a0edee932604027386591c82f6666e607cBrian } 148344356a0edee932604027386591c82f6666e607cBrian 149344356a0edee932604027386591c82f6666e607cBrian /* store result Z */ 150344356a0edee932604027386591c82f6666e607cBrian if (qss->depthOutSlot >= 0) { 151344356a0edee932604027386591c82f6666e607cBrian /* output[slot] is new Z */ 152344356a0edee932604027386591c82f6666e607cBrian uint i; 153344356a0edee932604027386591c82f6666e607cBrian for (i = 0; i < 4; i++) { 154344356a0edee932604027386591c82f6666e607cBrian quad->outputs.depth[i] = machine->Outputs[0].xyzw[2].f[i]; 155344356a0edee932604027386591c82f6666e607cBrian } 156344356a0edee932604027386591c82f6666e607cBrian } 157344356a0edee932604027386591c82f6666e607cBrian else { 158cbfe6ee5d58e7342012392a8ead7ae373625c00aBrian /* compute Z values now, as in the quad earlyz stage */ 159cbfe6ee5d58e7342012392a8ead7ae373625c00aBrian /* XXX we should really only do this if the earlyz stage is not used */ 160cbfe6ee5d58e7342012392a8ead7ae373625c00aBrian const float fx = (float) quad->x0; 161cbfe6ee5d58e7342012392a8ead7ae373625c00aBrian const float fy = (float) quad->y0; 162cbfe6ee5d58e7342012392a8ead7ae373625c00aBrian const float dzdx = quad->posCoef->dadx[2]; 163cbfe6ee5d58e7342012392a8ead7ae373625c00aBrian const float dzdy = quad->posCoef->dady[2]; 164cbfe6ee5d58e7342012392a8ead7ae373625c00aBrian const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; 165cbfe6ee5d58e7342012392a8ead7ae373625c00aBrian 166cbfe6ee5d58e7342012392a8ead7ae373625c00aBrian quad->outputs.depth[0] = z0; 167cbfe6ee5d58e7342012392a8ead7ae373625c00aBrian quad->outputs.depth[1] = z0 + dzdx; 168cbfe6ee5d58e7342012392a8ead7ae373625c00aBrian quad->outputs.depth[2] = z0 + dzdy; 169cbfe6ee5d58e7342012392a8ead7ae373625c00aBrian quad->outputs.depth[3] = z0 + dzdx + dzdy; 170344356a0edee932604027386591c82f6666e607cBrian } 171344356a0edee932604027386591c82f6666e607cBrian 172344356a0edee932604027386591c82f6666e607cBrian /* shader may cull fragments */ 173344356a0edee932604027386591c82f6666e607cBrian if( quad->mask ) { 174344356a0edee932604027386591c82f6666e607cBrian qs->next->run( qs->next, quad ); 175344356a0edee932604027386591c82f6666e607cBrian } 176344356a0edee932604027386591c82f6666e607cBrian} 177344356a0edee932604027386591c82f6666e607cBrian 178344356a0edee932604027386591c82f6666e607cBrian/** 179344356a0edee932604027386591c82f6666e607cBrian * Per-primitive (or per-begin?) setup 180344356a0edee932604027386591c82f6666e607cBrian */ 181344356a0edee932604027386591c82f6666e607cBrianstatic void shade_begin(struct quad_stage *qs) 182344356a0edee932604027386591c82f6666e607cBrian{ 183344356a0edee932604027386591c82f6666e607cBrian struct quad_shade_stage *qss = quad_shade_stage(qs); 184344356a0edee932604027386591c82f6666e607cBrian struct softpipe_context *softpipe = qs->softpipe; 185344356a0edee932604027386591c82f6666e607cBrian unsigned i; 186344356a0edee932604027386591c82f6666e607cBrian unsigned num = MAX2(softpipe->num_textures, softpipe->num_samplers); 187344356a0edee932604027386591c82f6666e607cBrian 188344356a0edee932604027386591c82f6666e607cBrian /* set TGSI sampler state that varies */ 189344356a0edee932604027386591c82f6666e607cBrian for (i = 0; i < num; i++) { 190344356a0edee932604027386591c82f6666e607cBrian qss->samplers[i].state = softpipe->sampler[i]; 191344356a0edee932604027386591c82f6666e607cBrian qss->samplers[i].texture = softpipe->texture[i]; 192344356a0edee932604027386591c82f6666e607cBrian } 193344356a0edee932604027386591c82f6666e607cBrian 194344356a0edee932604027386591c82f6666e607cBrian /* find output slots for depth, color */ 195344356a0edee932604027386591c82f6666e607cBrian qss->colorOutSlot = -1; 196344356a0edee932604027386591c82f6666e607cBrian qss->depthOutSlot = -1; 197344356a0edee932604027386591c82f6666e607cBrian for (i = 0; i < qss->stage.softpipe->fs->info.num_outputs; i++) { 198344356a0edee932604027386591c82f6666e607cBrian switch (qss->stage.softpipe->fs->info.output_semantic_name[i]) { 199344356a0edee932604027386591c82f6666e607cBrian case TGSI_SEMANTIC_POSITION: 200344356a0edee932604027386591c82f6666e607cBrian qss->depthOutSlot = i; 201344356a0edee932604027386591c82f6666e607cBrian break; 202344356a0edee932604027386591c82f6666e607cBrian case TGSI_SEMANTIC_COLOR: 203344356a0edee932604027386591c82f6666e607cBrian qss->colorOutSlot = i; 204344356a0edee932604027386591c82f6666e607cBrian break; 205344356a0edee932604027386591c82f6666e607cBrian } 206344356a0edee932604027386591c82f6666e607cBrian } 207344356a0edee932604027386591c82f6666e607cBrian 208344356a0edee932604027386591c82f6666e607cBrian softpipe->fs->prepare( softpipe->fs, 209344356a0edee932604027386591c82f6666e607cBrian &qss->machine, 210344356a0edee932604027386591c82f6666e607cBrian qss->samplers ); 211344356a0edee932604027386591c82f6666e607cBrian 212344356a0edee932604027386591c82f6666e607cBrian qs->next->begin(qs->next); 213344356a0edee932604027386591c82f6666e607cBrian} 214344356a0edee932604027386591c82f6666e607cBrian 215344356a0edee932604027386591c82f6666e607cBrian 216344356a0edee932604027386591c82f6666e607cBrianstatic void shade_destroy(struct quad_stage *qs) 217344356a0edee932604027386591c82f6666e607cBrian{ 218344356a0edee932604027386591c82f6666e607cBrian struct quad_shade_stage *qss = (struct quad_shade_stage *) qs; 219344356a0edee932604027386591c82f6666e607cBrian 220344356a0edee932604027386591c82f6666e607cBrian tgsi_exec_machine_free_data(&qss->machine); 221344356a0edee932604027386591c82f6666e607cBrian FREE( qss->inputs ); 222344356a0edee932604027386591c82f6666e607cBrian FREE( qss->outputs ); 223344356a0edee932604027386591c82f6666e607cBrian FREE( qs ); 224344356a0edee932604027386591c82f6666e607cBrian} 225344356a0edee932604027386591c82f6666e607cBrian 226344356a0edee932604027386591c82f6666e607cBrian 227344356a0edee932604027386591c82f6666e607cBrianstruct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe ) 228344356a0edee932604027386591c82f6666e607cBrian{ 229344356a0edee932604027386591c82f6666e607cBrian struct quad_shade_stage *qss = CALLOC_STRUCT(quad_shade_stage); 230344356a0edee932604027386591c82f6666e607cBrian uint i; 231344356a0edee932604027386591c82f6666e607cBrian 232344356a0edee932604027386591c82f6666e607cBrian /* allocate storage for program inputs/outputs, aligned to 16 bytes */ 23339038c11699bbc9baab744542e96d54e91cb452aBrian qss->inputs = MALLOC(PIPE_MAX_ATTRIBS * sizeof(*qss->inputs) + 16); 23439038c11699bbc9baab744542e96d54e91cb452aBrian qss->outputs = MALLOC(PIPE_MAX_ATTRIBS * sizeof(*qss->outputs) + 16); 235344356a0edee932604027386591c82f6666e607cBrian qss->machine.Inputs = align16(qss->inputs); 236344356a0edee932604027386591c82f6666e607cBrian qss->machine.Outputs = align16(qss->outputs); 237344356a0edee932604027386591c82f6666e607cBrian 238344356a0edee932604027386591c82f6666e607cBrian qss->stage.softpipe = softpipe; 239344356a0edee932604027386591c82f6666e607cBrian qss->stage.begin = shade_begin; 240344356a0edee932604027386591c82f6666e607cBrian qss->stage.run = shade_quad; 241344356a0edee932604027386591c82f6666e607cBrian qss->stage.destroy = shade_destroy; 242344356a0edee932604027386591c82f6666e607cBrian 243344356a0edee932604027386591c82f6666e607cBrian /* set TGSI sampler state that's constant */ 244344356a0edee932604027386591c82f6666e607cBrian for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { 245344356a0edee932604027386591c82f6666e607cBrian assert(softpipe->tex_cache[i]); 246344356a0edee932604027386591c82f6666e607cBrian qss->samplers[i].get_samples = sp_get_samples; 247344356a0edee932604027386591c82f6666e607cBrian qss->samplers[i].pipe = &softpipe->pipe; 248344356a0edee932604027386591c82f6666e607cBrian qss->samplers[i].cache = softpipe->tex_cache[i]; 249344356a0edee932604027386591c82f6666e607cBrian } 250344356a0edee932604027386591c82f6666e607cBrian 251344356a0edee932604027386591c82f6666e607cBrian tgsi_exec_machine_init( &qss->machine ); 252344356a0edee932604027386591c82f6666e607cBrian 253344356a0edee932604027386591c82f6666e607cBrian return &qss->stage; 254344356a0edee932604027386591c82f6666e607cBrian} 255