t_vb_program.c revision 9520f483b8f1e45fa474674b415554988de5d8d3
1/*
2 * Mesa 3-D graphics library
3 * Version:  7.6
4 *
5 * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
6 * Copyright (C) 2009  VMware, Inc.  All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included
16 * in all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26
27/**
28 * \file tnl/t_vb_program.c
29 * \brief Pipeline stage for executing vertex programs.
30 * \author Brian Paul,  Keith Whitwell
31 */
32
33
34#include "main/glheader.h"
35#include "main/colormac.h"
36#include "main/macros.h"
37#include "main/imports.h"
38#include "math/m_xform.h"
39#include "program/prog_instruction.h"
40#include "program/prog_statevars.h"
41#include "program/prog_execute.h"
42#include "swrast/s_context.h"
43
44#include "tnl/tnl.h"
45#include "tnl/t_context.h"
46#include "tnl/t_pipeline.h"
47
48
49#ifdef NAN_CHECK
50/** Check for NaNs and very large values */
51static inline void
52check_float(float x)
53{
54   assert(!IS_INF_OR_NAN(x));
55   assert(1.0e-15 <= x && x <= 1.0e15);
56}
57#endif
58
59
60/*!
61 * Private storage for the vertex program pipeline stage.
62 */
63struct vp_stage_data {
64   /** The results of running the vertex program go into these arrays. */
65   GLvector4f results[VERT_RESULT_MAX];
66
67   GLvector4f ndcCoords;              /**< normalized device coords */
68   GLubyte *clipmask;                 /**< clip flags */
69   GLubyte ormask, andmask;           /**< for clipping */
70
71   struct gl_program_machine machine;
72};
73
74
75#define VP_STAGE_DATA(stage) ((struct vp_stage_data *)(stage->privatePtr))
76
77
78static void
79userclip( struct gl_context *ctx,
80          GLvector4f *clip,
81          GLubyte *clipmask,
82          GLubyte *clipormask,
83          GLubyte *clipandmask )
84{
85   GLuint p;
86
87   for (p = 0; p < ctx->Const.MaxClipPlanes; p++) {
88      if (ctx->Transform.ClipPlanesEnabled & (1 << p)) {
89	 GLuint nr, i;
90	 const GLfloat a = ctx->Transform._ClipUserPlane[p][0];
91	 const GLfloat b = ctx->Transform._ClipUserPlane[p][1];
92	 const GLfloat c = ctx->Transform._ClipUserPlane[p][2];
93	 const GLfloat d = ctx->Transform._ClipUserPlane[p][3];
94         GLfloat *coord = (GLfloat *)clip->data;
95         GLuint stride = clip->stride;
96         GLuint count = clip->count;
97
98	 for (nr = 0, i = 0 ; i < count ; i++) {
99	    GLfloat dp = (coord[0] * a +
100			  coord[1] * b +
101			  coord[2] * c +
102			  coord[3] * d);
103
104	    if (dp < 0) {
105	       nr++;
106	       clipmask[i] |= CLIP_USER_BIT;
107	    }
108
109	    STRIDE_F(coord, stride);
110	 }
111
112	 if (nr > 0) {
113	    *clipormask |= CLIP_USER_BIT;
114	    if (nr == count) {
115	       *clipandmask |= CLIP_USER_BIT;
116	       return;
117	    }
118	 }
119      }
120   }
121}
122
123
124static GLboolean
125do_ndc_cliptest(struct gl_context *ctx, struct vp_stage_data *store)
126{
127   TNLcontext *tnl = TNL_CONTEXT(ctx);
128   struct vertex_buffer *VB = &tnl->vb;
129   /* Cliptest and perspective divide.  Clip functions must clear
130    * the clipmask.
131    */
132   store->ormask = 0;
133   store->andmask = CLIP_FRUSTUM_BITS;
134
135   tnl_clip_prepare(ctx);
136
137   if (tnl->NeedNdcCoords) {
138      VB->NdcPtr =
139         _mesa_clip_tab[VB->ClipPtr->size]( VB->ClipPtr,
140                                            &store->ndcCoords,
141                                            store->clipmask,
142                                            &store->ormask,
143                                            &store->andmask,
144					    !ctx->Transform.DepthClamp );
145   }
146   else {
147      VB->NdcPtr = NULL;
148      _mesa_clip_np_tab[VB->ClipPtr->size]( VB->ClipPtr,
149                                            NULL,
150                                            store->clipmask,
151                                            &store->ormask,
152                                            &store->andmask,
153					    !ctx->Transform.DepthClamp );
154   }
155
156   if (store->andmask) {
157      /* All vertices are outside the frustum */
158      return GL_FALSE;
159   }
160
161   /* Test userclip planes.  This contributes to VB->ClipMask.
162    */
163   /** XXX NEW_SLANG _Enabled ??? */
164   if (ctx->Transform.ClipPlanesEnabled && (!ctx->VertexProgram._Enabled ||
165      ctx->VertexProgram.Current->IsPositionInvariant)) {
166      userclip( ctx,
167		VB->ClipPtr,
168		store->clipmask,
169		&store->ormask,
170		&store->andmask );
171
172      if (store->andmask) {
173	 return GL_FALSE;
174      }
175   }
176
177   VB->ClipAndMask = store->andmask;
178   VB->ClipOrMask = store->ormask;
179   VB->ClipMask = store->clipmask;
180
181   return GL_TRUE;
182}
183
184
185/**
186 * XXX the texture sampling code in this module is a bit of a hack.
187 * The texture sampling code is in swrast, though it doesn't have any
188 * real dependencies on the rest of swrast.  It should probably be
189 * moved into main/ someday.
190 */
191static void
192vp_fetch_texel(struct gl_context *ctx, const GLfloat texcoord[4], GLfloat lambda,
193               GLuint unit, GLfloat color[4])
194{
195   SWcontext *swrast = SWRAST_CONTEXT(ctx);
196
197   /* XXX use a float-valued TextureSample routine here!!! */
198   swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
199                               1, (const GLfloat (*)[4]) texcoord,
200                               &lambda,  (GLfloat (*)[4]) color);
201}
202
203
204/**
205 * Called via ctx->Driver.ProgramStringNotify() after a new vertex program
206 * string has been parsed.
207 */
208GLboolean
209_tnl_program_string(struct gl_context *ctx, GLenum target, struct gl_program *program)
210{
211   /* No-op.
212    * If we had derived anything from the program that was private to this
213    * stage we'd recompute/validate it here.
214    */
215   return GL_TRUE;
216}
217
218
219/**
220 * Initialize virtual machine state prior to executing vertex program.
221 */
222static void
223init_machine(struct gl_context *ctx, struct gl_program_machine *machine,
224             GLuint instID)
225{
226   /* Input registers get initialized from the current vertex attribs */
227   memcpy(machine->VertAttribs, ctx->Current.Attrib,
228          MAX_VERTEX_GENERIC_ATTRIBS * 4 * sizeof(GLfloat));
229
230   if (ctx->VertexProgram._Current->IsNVProgram) {
231      GLuint i;
232      /* Output/result regs are initialized to [0,0,0,1] */
233      for (i = 0; i < MAX_NV_VERTEX_PROGRAM_OUTPUTS; i++) {
234         ASSIGN_4V(machine->Outputs[i], 0.0F, 0.0F, 0.0F, 1.0F);
235      }
236      /* Temp regs are initialized to [0,0,0,0] */
237      for (i = 0; i < MAX_NV_VERTEX_PROGRAM_TEMPS; i++) {
238         ASSIGN_4V(machine->Temporaries[i], 0.0F, 0.0F, 0.0F, 0.0F);
239      }
240      for (i = 0; i < MAX_VERTEX_PROGRAM_ADDRESS_REGS; i++) {
241         ASSIGN_4V(machine->AddressReg[i], 0, 0, 0, 0);
242      }
243   }
244
245   machine->NumDeriv = 0;
246
247   /* init condition codes */
248   machine->CondCodes[0] = COND_EQ;
249   machine->CondCodes[1] = COND_EQ;
250   machine->CondCodes[2] = COND_EQ;
251   machine->CondCodes[3] = COND_EQ;
252
253   /* init call stack */
254   machine->StackDepth = 0;
255
256   machine->FetchTexelLod = vp_fetch_texel;
257   machine->FetchTexelDeriv = NULL; /* not used by vertex programs */
258
259   machine->Samplers = ctx->VertexProgram._Current->Base.SamplerUnits;
260
261   machine->SystemValues[SYSTEM_VALUE_INSTANCE_ID][0] = (GLfloat) instID;
262}
263
264
265/**
266 * Map the texture images which the vertex program will access (if any).
267 */
268static void
269map_textures(struct gl_context *ctx, const struct gl_vertex_program *vp)
270{
271   GLuint u;
272
273   if (!ctx->Driver.MapTexture)
274      return;
275
276   for (u = 0; u < ctx->Const.MaxVertexTextureImageUnits; u++) {
277      if (vp->Base.TexturesUsed[u]) {
278         /* Note: _Current *should* correspond to the target indicated
279          * in TexturesUsed[u].
280          */
281         ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[u]._Current);
282      }
283   }
284}
285
286
287/**
288 * Unmap the texture images which were used by the vertex program (if any).
289 */
290static void
291unmap_textures(struct gl_context *ctx, const struct gl_vertex_program *vp)
292{
293   GLuint u;
294
295   if (!ctx->Driver.MapTexture)
296      return;
297
298   for (u = 0; u < ctx->Const.MaxVertexTextureImageUnits; u++) {
299      if (vp->Base.TexturesUsed[u]) {
300         /* Note: _Current *should* correspond to the target indicated
301          * in TexturesUsed[u].
302          */
303         ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[u]._Current);
304      }
305   }
306}
307
308
309/**
310 * This function executes vertex programs
311 */
312static GLboolean
313run_vp( struct gl_context *ctx, struct tnl_pipeline_stage *stage )
314{
315   TNLcontext *tnl = TNL_CONTEXT(ctx);
316   struct vp_stage_data *store = VP_STAGE_DATA(stage);
317   struct vertex_buffer *VB = &tnl->vb;
318   struct gl_vertex_program *program = ctx->VertexProgram._Current;
319   struct gl_program_machine *machine = &store->machine;
320   GLuint outputs[VERT_RESULT_MAX], numOutputs;
321   GLuint i, j;
322
323   if (!program)
324      return GL_TRUE;
325
326   if (program->IsNVProgram) {
327      _mesa_load_tracked_matrices(ctx);
328   }
329   else {
330      /* ARB program or vertex shader */
331      _mesa_load_state_parameters(ctx, program->Base.Parameters);
332   }
333
334   /* make list of outputs to save some time below */
335   numOutputs = 0;
336   for (i = 0; i < VERT_RESULT_MAX; i++) {
337      if (program->Base.OutputsWritten & BITFIELD64_BIT(i)) {
338         outputs[numOutputs++] = i;
339      }
340   }
341
342   map_textures(ctx, program);
343
344   for (i = 0; i < VB->Count; i++) {
345      GLuint attr;
346
347      init_machine(ctx, machine, tnl->CurInstance);
348
349#if 0
350      printf("Input  %d: %f, %f, %f, %f\n", i,
351             VB->AttribPtr[0]->data[i][0],
352             VB->AttribPtr[0]->data[i][1],
353             VB->AttribPtr[0]->data[i][2],
354             VB->AttribPtr[0]->data[i][3]);
355      printf("   color: %f, %f, %f, %f\n",
356             VB->AttribPtr[3]->data[i][0],
357             VB->AttribPtr[3]->data[i][1],
358             VB->AttribPtr[3]->data[i][2],
359             VB->AttribPtr[3]->data[i][3]);
360      printf("  normal: %f, %f, %f, %f\n",
361             VB->AttribPtr[2]->data[i][0],
362             VB->AttribPtr[2]->data[i][1],
363             VB->AttribPtr[2]->data[i][2],
364             VB->AttribPtr[2]->data[i][3]);
365#endif
366
367      /* the vertex array case */
368      for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
369	 if (program->Base.InputsRead & (1 << attr)) {
370	    const GLubyte *ptr = (const GLubyte*) VB->AttribPtr[attr]->data;
371	    const GLuint size = VB->AttribPtr[attr]->size;
372	    const GLuint stride = VB->AttribPtr[attr]->stride;
373	    const GLfloat *data = (GLfloat *) (ptr + stride * i);
374#ifdef NAN_CHECK
375            check_float(data[0]);
376            check_float(data[1]);
377            check_float(data[2]);
378            check_float(data[3]);
379#endif
380	    COPY_CLEAN_4V(machine->VertAttribs[attr], size, data);
381	 }
382      }
383
384      /* execute the program */
385      _mesa_execute_program(ctx, &program->Base, machine);
386
387      /* copy the output registers into the VB->attribs arrays */
388      for (j = 0; j < numOutputs; j++) {
389         const GLuint attr = outputs[j];
390#ifdef NAN_CHECK
391         check_float(machine->Outputs[attr][0]);
392         check_float(machine->Outputs[attr][1]);
393         check_float(machine->Outputs[attr][2]);
394         check_float(machine->Outputs[attr][3]);
395#endif
396         COPY_4V(store->results[attr].data[i], machine->Outputs[attr]);
397      }
398
399      /* FOGC is a special case.  Fragment shader expects (f,0,0,1) */
400      if (program->Base.OutputsWritten & BITFIELD64_BIT(VERT_RESULT_FOGC)) {
401         store->results[VERT_RESULT_FOGC].data[i][1] = 0.0;
402         store->results[VERT_RESULT_FOGC].data[i][2] = 0.0;
403         store->results[VERT_RESULT_FOGC].data[i][3] = 1.0;
404      }
405#ifdef NAN_CHECK
406      ASSERT(machine->Outputs[0][3] != 0.0F);
407#endif
408#if 0
409      printf("HPOS: %f %f %f %f\n",
410             machine->Outputs[0][0],
411             machine->Outputs[0][1],
412             machine->Outputs[0][2],
413             machine->Outputs[0][3]);
414#endif
415   }
416
417   unmap_textures(ctx, program);
418
419   /* Fixup fog and point size results if needed */
420   if (program->IsNVProgram) {
421      if (ctx->Fog.Enabled &&
422          (program->Base.OutputsWritten & BITFIELD64_BIT(VERT_RESULT_FOGC)) == 0) {
423         for (i = 0; i < VB->Count; i++) {
424            store->results[VERT_RESULT_FOGC].data[i][0] = 1.0;
425         }
426      }
427
428      if (ctx->VertexProgram.PointSizeEnabled &&
429          (program->Base.OutputsWritten & BITFIELD64_BIT(VERT_RESULT_PSIZ)) == 0) {
430         for (i = 0; i < VB->Count; i++) {
431            store->results[VERT_RESULT_PSIZ].data[i][0] = ctx->Point.Size;
432         }
433      }
434   }
435
436   if (program->IsPositionInvariant) {
437      /* We need the exact same transform as in the fixed function path here
438       * to guarantee invariance, depending on compiler optimization flags
439       * results could be different otherwise.
440       */
441      VB->ClipPtr = TransformRaw( &store->results[0],
442				  &ctx->_ModelProjectMatrix,
443				  VB->AttribPtr[0] );
444
445      /* Drivers expect this to be clean to element 4...
446       */
447      switch (VB->ClipPtr->size) {
448      case 1:
449	 /* impossible */
450      case 2:
451	 _mesa_vector4f_clean_elem( VB->ClipPtr, VB->Count, 2 );
452	 /* fall-through */
453      case 3:
454	 _mesa_vector4f_clean_elem( VB->ClipPtr, VB->Count, 3 );
455	 /* fall-through */
456      case 4:
457	 break;
458      }
459   }
460   else {
461      /* Setup the VB pointers so that the next pipeline stages get
462       * their data from the right place (the program output arrays).
463       */
464      VB->ClipPtr = &store->results[VERT_RESULT_HPOS];
465      VB->ClipPtr->size = 4;
466      VB->ClipPtr->count = VB->Count;
467   }
468
469   VB->AttribPtr[VERT_ATTRIB_COLOR0] = &store->results[VERT_RESULT_COL0];
470   VB->AttribPtr[VERT_ATTRIB_COLOR1] = &store->results[VERT_RESULT_COL1];
471   VB->AttribPtr[VERT_ATTRIB_FOG] = &store->results[VERT_RESULT_FOGC];
472   VB->AttribPtr[_TNL_ATTRIB_POINTSIZE] = &store->results[VERT_RESULT_PSIZ];
473   VB->BackfaceColorPtr = &store->results[VERT_RESULT_BFC0];
474   VB->BackfaceSecondaryColorPtr = &store->results[VERT_RESULT_BFC1];
475
476   for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) {
477      VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]
478         = &store->results[VERT_RESULT_TEX0 + i];
479   }
480
481   for (i = 0; i < ctx->Const.MaxVarying; i++) {
482      if (program->Base.OutputsWritten & BITFIELD64_BIT(VERT_RESULT_VAR0 + i)) {
483         /* Note: varying results get put into the generic attributes */
484	 VB->AttribPtr[VERT_ATTRIB_GENERIC0+i]
485            = &store->results[VERT_RESULT_VAR0 + i];
486      }
487   }
488
489
490   /* Perform NDC and cliptest operations:
491    */
492   return do_ndc_cliptest(ctx, store);
493}
494
495
496/**
497 * Called the first time stage->run is called.  In effect, don't
498 * allocate data until the first time the stage is run.
499 */
500static GLboolean
501init_vp(struct gl_context *ctx, struct tnl_pipeline_stage *stage)
502{
503   TNLcontext *tnl = TNL_CONTEXT(ctx);
504   struct vertex_buffer *VB = &(tnl->vb);
505   struct vp_stage_data *store;
506   const GLuint size = VB->Size;
507   GLuint i;
508
509   stage->privatePtr = CALLOC(sizeof(*store));
510   store = VP_STAGE_DATA(stage);
511   if (!store)
512      return GL_FALSE;
513
514   /* Allocate arrays of vertex output values */
515   for (i = 0; i < VERT_RESULT_MAX; i++) {
516      _mesa_vector4f_alloc( &store->results[i], 0, size, 32 );
517      store->results[i].size = 4;
518   }
519
520   /* a few other misc allocations */
521   _mesa_vector4f_alloc( &store->ndcCoords, 0, size, 32 );
522   store->clipmask = (GLubyte *) _mesa_align_malloc(sizeof(GLubyte)*size, 32 );
523
524   return GL_TRUE;
525}
526
527
528/**
529 * Destructor for this pipeline stage.
530 */
531static void
532dtr(struct tnl_pipeline_stage *stage)
533{
534   struct vp_stage_data *store = VP_STAGE_DATA(stage);
535
536   if (store) {
537      GLuint i;
538
539      /* free the vertex program result arrays */
540      for (i = 0; i < VERT_RESULT_MAX; i++)
541         _mesa_vector4f_free( &store->results[i] );
542
543      /* free misc arrays */
544      _mesa_vector4f_free( &store->ndcCoords );
545      _mesa_align_free( store->clipmask );
546
547      FREE( store );
548      stage->privatePtr = NULL;
549   }
550}
551
552
553static void
554validate_vp_stage(struct gl_context *ctx, struct tnl_pipeline_stage *stage)
555{
556   if (ctx->VertexProgram._Current) {
557      _swrast_update_texture_samplers(ctx);
558   }
559}
560
561
562
563/**
564 * Public description of this pipeline stage.
565 */
566const struct tnl_pipeline_stage _tnl_vertex_program_stage =
567{
568   "vertex-program",
569   NULL,			/* private_data */
570   init_vp,			/* create */
571   dtr,				/* destroy */
572   validate_vp_stage, 		/* validate */
573   run_vp			/* run -- initially set to ctr */
574};
575