swr_shader.cpp revision efdaf5fa3e74ca4f3d9217dc6955aef6dc698a68
1/****************************************************************************
2 * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 ***************************************************************************/
23
24// llvm redefines DEBUG
25#pragma push_macro("DEBUG")
26#undef DEBUG
27#include "JitManager.h"
28#include "llvm-c/Core.h"
29#include "llvm/Support/CBindingWrapping.h"
30#pragma pop_macro("DEBUG")
31
32#include "state.h"
33#include "state_llvm.h"
34#include "builder.h"
35
36#include "tgsi/tgsi_strings.h"
37#include "gallivm/lp_bld_init.h"
38#include "gallivm/lp_bld_flow.h"
39#include "gallivm/lp_bld_struct.h"
40#include "gallivm/lp_bld_tgsi.h"
41
42#include "swr_context.h"
43#include "swr_context_llvm.h"
44#include "swr_state.h"
45#include "swr_screen.h"
46
47static unsigned
48locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info);
49
50bool operator==(const swr_jit_fs_key &lhs, const swr_jit_fs_key &rhs)
51{
52   return !memcmp(&lhs, &rhs, sizeof(lhs));
53}
54
55bool operator==(const swr_jit_vs_key &lhs, const swr_jit_vs_key &rhs)
56{
57   return !memcmp(&lhs, &rhs, sizeof(lhs));
58}
59
60static void
61swr_generate_sampler_key(const struct lp_tgsi_info &info,
62                         struct swr_context *ctx,
63                         unsigned shader_type,
64                         struct swr_jit_sampler_key &key)
65{
66   key.nr_samplers = info.base.file_max[TGSI_FILE_SAMPLER] + 1;
67
68   for (unsigned i = 0; i < key.nr_samplers; i++) {
69      if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
70         lp_sampler_static_sampler_state(
71            &key.sampler[i].sampler_state,
72            ctx->samplers[shader_type][i]);
73      }
74   }
75
76   /*
77    * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes
78    * are dx10-style? Can't really have mixed opcodes, at least not
79    * if we want to skip the holes here (without rescanning tgsi).
80    */
81   if (info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
82      key.nr_sampler_views =
83         info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
84      for (unsigned i = 0; i < key.nr_sampler_views; i++) {
85         if (info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1 << i)) {
86            lp_sampler_static_texture_state(
87               &key.sampler[i].texture_state,
88               ctx->sampler_views[shader_type][i]);
89         }
90      }
91   } else {
92      key.nr_sampler_views = key.nr_samplers;
93      for (unsigned i = 0; i < key.nr_sampler_views; i++) {
94         if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
95            lp_sampler_static_texture_state(
96               &key.sampler[i].texture_state,
97               ctx->sampler_views[shader_type][i]);
98         }
99      }
100   }
101}
102
103void
104swr_generate_fs_key(struct swr_jit_fs_key &key,
105                    struct swr_context *ctx,
106                    swr_fragment_shader *swr_fs)
107{
108   memset(&key, 0, sizeof(key));
109
110   key.nr_cbufs = ctx->framebuffer.nr_cbufs;
111   key.light_twoside = ctx->rasterizer->light_twoside;
112   memcpy(&key.vs_output_semantic_name,
113          &ctx->vs->info.base.output_semantic_name,
114          sizeof(key.vs_output_semantic_name));
115   memcpy(&key.vs_output_semantic_idx,
116          &ctx->vs->info.base.output_semantic_index,
117          sizeof(key.vs_output_semantic_idx));
118
119   swr_generate_sampler_key(swr_fs->info, ctx, PIPE_SHADER_FRAGMENT, key);
120}
121
122void
123swr_generate_vs_key(struct swr_jit_vs_key &key,
124                    struct swr_context *ctx,
125                    swr_vertex_shader *swr_vs)
126{
127   memset(&key, 0, sizeof(key));
128
129   key.clip_plane_mask =
130      swr_vs->info.base.clipdist_writemask ?
131      swr_vs->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable :
132      ctx->rasterizer->clip_plane_enable;
133
134   swr_generate_sampler_key(swr_vs->info, ctx, PIPE_SHADER_VERTEX, key);
135}
136
137struct BuilderSWR : public Builder {
138   BuilderSWR(JitManager *pJitMgr, const char *pName)
139      : Builder(pJitMgr)
140   {
141      pJitMgr->SetupNewModule();
142      gallivm = gallivm_create(pName, wrap(&JM()->mContext));
143      pJitMgr->mpCurrentModule = unwrap(gallivm->module);
144   }
145
146   ~BuilderSWR() {
147      gallivm_free_ir(gallivm);
148   }
149
150   struct gallivm_state *gallivm;
151   PFN_VERTEX_FUNC CompileVS(struct swr_context *ctx, swr_jit_vs_key &key);
152   PFN_PIXEL_KERNEL CompileFS(struct swr_context *ctx, swr_jit_fs_key &key);
153};
154
155PFN_VERTEX_FUNC
156BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
157{
158   struct swr_vertex_shader *swr_vs = ctx->vs;
159
160   LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
161   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
162
163   memset(outputs, 0, sizeof(outputs));
164
165   AttrBuilder attrBuilder;
166   attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
167   AttributeSet attrSet = AttributeSet::get(
168      JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
169
170   std::vector<Type *> vsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
171                              PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)};
172   FunctionType *vsFuncType =
173      FunctionType::get(Type::getVoidTy(JM()->mContext), vsArgs, false);
174
175   // create new vertex shader function
176   auto pFunction = Function::Create(vsFuncType,
177                                     GlobalValue::ExternalLinkage,
178                                     "VS",
179                                     JM()->mpCurrentModule);
180   pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
181
182   BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
183   IRB()->SetInsertPoint(block);
184   LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
185
186   auto argitr = pFunction->arg_begin();
187   Value *hPrivateData = &*argitr++;
188   hPrivateData->setName("hPrivateData");
189   Value *pVsCtx = &*argitr++;
190   pVsCtx->setName("vsCtx");
191
192   Value *consts_ptr = GEP(hPrivateData, {C(0), C(swr_draw_context_constantVS)});
193
194   consts_ptr->setName("vs_constants");
195   Value *const_sizes_ptr =
196      GEP(hPrivateData, {0, swr_draw_context_num_constantsVS});
197   const_sizes_ptr->setName("num_vs_constants");
198
199   Value *vtxInput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVin});
200
201   for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) {
202      const unsigned mask = swr_vs->info.base.input_usage_mask[attrib];
203      for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
204         if (mask & (1 << channel)) {
205            inputs[attrib][channel] =
206               wrap(LOAD(vtxInput, {0, 0, attrib, channel}));
207         }
208      }
209   }
210
211   struct lp_build_sampler_soa *sampler =
212      swr_sampler_soa_create(key.sampler, PIPE_SHADER_VERTEX);
213
214   struct lp_bld_tgsi_system_values system_values;
215   memset(&system_values, 0, sizeof(system_values));
216   system_values.instance_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_InstanceID}));
217   system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID}));
218
219   lp_build_tgsi_soa(gallivm,
220                     swr_vs->pipe.tokens,
221                     lp_type_float_vec(32, 32 * 8),
222                     NULL, // mask
223                     wrap(consts_ptr),
224                     wrap(const_sizes_ptr),
225                     &system_values,
226                     inputs,
227                     outputs,
228                     wrap(hPrivateData), // (sampler context)
229                     NULL, // thread data
230                     sampler, // sampler
231                     &swr_vs->info.base,
232                     NULL); // geometry shader face
233
234   sampler->destroy(sampler);
235
236   IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
237
238   Value *vtxOutput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVout});
239
240   for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
241      for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_OUTPUTS; attrib++) {
242         if (!outputs[attrib][channel])
243            continue;
244
245         Value *val = LOAD(unwrap(outputs[attrib][channel]));
246
247         uint32_t outSlot = attrib;
248         if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE)
249            outSlot = VERTEX_POINT_SIZE_SLOT;
250         STORE(val, vtxOutput, {0, 0, outSlot, channel});
251      }
252   }
253
254   if (ctx->rasterizer->clip_plane_enable ||
255       swr_vs->info.base.culldist_writemask) {
256      unsigned clip_mask = ctx->rasterizer->clip_plane_enable;
257
258      unsigned cv = 0;
259      if (swr_vs->info.base.writes_clipvertex) {
260         cv = 1 + locate_linkage(TGSI_SEMANTIC_CLIPVERTEX, 0,
261                                 &swr_vs->info.base);
262      } else {
263         for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
264            if (swr_vs->info.base.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
265                swr_vs->info.base.output_semantic_index[i] == 0) {
266               cv = i;
267               break;
268            }
269         }
270      }
271      LLVMValueRef cx = LLVMBuildLoad(gallivm->builder, outputs[cv][0], "");
272      LLVMValueRef cy = LLVMBuildLoad(gallivm->builder, outputs[cv][1], "");
273      LLVMValueRef cz = LLVMBuildLoad(gallivm->builder, outputs[cv][2], "");
274      LLVMValueRef cw = LLVMBuildLoad(gallivm->builder, outputs[cv][3], "");
275
276      for (unsigned val = 0; val < PIPE_MAX_CLIP_PLANES; val++) {
277         // clip distance overrides user clip planes
278         if ((swr_vs->info.base.clipdist_writemask & clip_mask & (1 << val)) ||
279             ((swr_vs->info.base.culldist_writemask << swr_vs->info.base.num_written_clipdistance) & (1 << val))) {
280            unsigned cv = 1 + locate_linkage(TGSI_SEMANTIC_CLIPDIST, val < 4 ? 0 : 1,
281                                             &swr_vs->info.base);
282            if (val < 4) {
283               LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val], "");
284               STORE(unwrap(dist), vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_LO_SLOT, val});
285            } else {
286               LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val - 4], "");
287               STORE(unwrap(dist), vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4});
288            }
289            continue;
290         }
291
292         if (!(clip_mask & (1 << val)))
293            continue;
294
295         Value *px = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 0}));
296         Value *py = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 1}));
297         Value *pz = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 2}));
298         Value *pw = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 3}));
299         Value *dist = FADD(FMUL(unwrap(cx), VBROADCAST(px)),
300                            FADD(FMUL(unwrap(cy), VBROADCAST(py)),
301                                 FADD(FMUL(unwrap(cz), VBROADCAST(pz)),
302                                      FMUL(unwrap(cw), VBROADCAST(pw)))));
303
304         if (val < 4)
305            STORE(dist, vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_LO_SLOT, val});
306         else
307            STORE(dist, vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4});
308      }
309   }
310
311   RET_VOID();
312
313   gallivm_verify_function(gallivm, wrap(pFunction));
314   gallivm_compile_module(gallivm);
315
316   //   lp_debug_dump_value(func);
317
318   PFN_VERTEX_FUNC pFunc =
319      (PFN_VERTEX_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
320
321   debug_printf("vert shader  %p\n", pFunc);
322   assert(pFunc && "Error: VertShader = NULL");
323
324#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR >= 5)
325   JM()->mIsModuleFinalized = true;
326#endif
327
328   return pFunc;
329}
330
331PFN_VERTEX_FUNC
332swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key)
333{
334   BuilderSWR builder(
335      reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
336      "VS");
337   PFN_VERTEX_FUNC func = builder.CompileVS(ctx, key);
338
339   ctx->vs->map.insert(std::make_pair(key, make_unique<VariantVS>(builder.gallivm, func)));
340   return func;
341}
342
343static unsigned
344locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info)
345{
346   for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
347      if ((info->output_semantic_name[i] == name)
348          && (info->output_semantic_index[i] == index)) {
349         return i - 1; // position is not part of the linkage
350      }
351   }
352
353   if (name == TGSI_SEMANTIC_COLOR) { // BCOLOR fallback
354      for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
355         if ((info->output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR)
356             && (info->output_semantic_index[i] == index)) {
357            return i - 1; // position is not part of the linkage
358         }
359      }
360   }
361
362   return 0xFFFFFFFF;
363}
364
365PFN_PIXEL_KERNEL
366BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key)
367{
368   struct swr_fragment_shader *swr_fs = ctx->fs;
369
370   LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
371   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
372
373   memset(inputs, 0, sizeof(inputs));
374   memset(outputs, 0, sizeof(outputs));
375
376   struct lp_build_sampler_soa *sampler = NULL;
377
378   AttrBuilder attrBuilder;
379   attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
380   AttributeSet attrSet = AttributeSet::get(
381      JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
382
383   std::vector<Type *> fsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
384                              PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)};
385   FunctionType *funcType =
386      FunctionType::get(Type::getVoidTy(JM()->mContext), fsArgs, false);
387
388   auto pFunction = Function::Create(funcType,
389                                     GlobalValue::ExternalLinkage,
390                                     "FS",
391                                     JM()->mpCurrentModule);
392   pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
393
394   BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
395   IRB()->SetInsertPoint(block);
396   LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
397
398   auto args = pFunction->arg_begin();
399   Value *hPrivateData = &*args++;
400   hPrivateData->setName("hPrivateData");
401   Value *pPS = &*args++;
402   pPS->setName("psCtx");
403
404   Value *consts_ptr = GEP(hPrivateData, {0, swr_draw_context_constantFS});
405   consts_ptr->setName("fs_constants");
406   Value *const_sizes_ptr =
407      GEP(hPrivateData, {0, swr_draw_context_num_constantsFS});
408   const_sizes_ptr->setName("num_fs_constants");
409
410   // load *pAttribs, *pPerspAttribs
411   Value *pRawAttribs = LOAD(pPS, {0, SWR_PS_CONTEXT_pAttribs}, "pRawAttribs");
412   Value *pPerspAttribs =
413      LOAD(pPS, {0, SWR_PS_CONTEXT_pPerspAttribs}, "pPerspAttribs");
414
415   swr_fs->constantMask = 0;
416   swr_fs->flatConstantMask = 0;
417   swr_fs->pointSpriteMask = 0;
418
419   for (int attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) {
420      const unsigned mask = swr_fs->info.base.input_usage_mask[attrib];
421      const unsigned interpMode = swr_fs->info.base.input_interpolate[attrib];
422      const unsigned interpLoc = swr_fs->info.base.input_interpolate_loc[attrib];
423
424      if (!mask)
425         continue;
426
427      // load i,j
428      Value *vi = nullptr, *vj = nullptr;
429      switch (interpLoc) {
430      case TGSI_INTERPOLATE_LOC_CENTER:
431         vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_center}, "i");
432         vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_center}, "j");
433         break;
434      case TGSI_INTERPOLATE_LOC_CENTROID:
435         vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_centroid}, "i");
436         vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_centroid}, "j");
437         break;
438      case TGSI_INTERPOLATE_LOC_SAMPLE:
439         vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_sample}, "i");
440         vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_sample}, "j");
441         break;
442      }
443
444      // load/compute w
445      Value *vw = nullptr, *pAttribs;
446      if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE) {
447         pAttribs = pPerspAttribs;
448         switch (interpLoc) {
449         case TGSI_INTERPOLATE_LOC_CENTER:
450            vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}));
451            break;
452         case TGSI_INTERPOLATE_LOC_CENTROID:
453            vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_centroid}));
454            break;
455         case TGSI_INTERPOLATE_LOC_SAMPLE:
456            vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_sample}));
457            break;
458         }
459      } else {
460         pAttribs = pRawAttribs;
461         vw = VIMMED1(1.f);
462      }
463
464      vw->setName("w");
465
466      ubyte semantic_name = swr_fs->info.base.input_semantic_name[attrib];
467      ubyte semantic_idx = swr_fs->info.base.input_semantic_index[attrib];
468
469      if (semantic_name == TGSI_SEMANTIC_FACE) {
470         Value *ff =
471            UI_TO_FP(LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), mFP32Ty);
472         ff = FSUB(FMUL(ff, C(2.0f)), C(1.0f));
473         ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vFrontFace");
474
475         inputs[attrib][0] = wrap(ff);
476         inputs[attrib][1] = wrap(VIMMED1(0.0f));
477         inputs[attrib][2] = wrap(VIMMED1(0.0f));
478         inputs[attrib][3] = wrap(VIMMED1(1.0f));
479         continue;
480      } else if (semantic_name == TGSI_SEMANTIC_POSITION) { // gl_FragCoord
481         inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_center}, "vX"));
482         inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_center}, "vY"));
483         inputs[attrib][2] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vZ}, "vZ"));
484         inputs[attrib][3] =
485            wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}, "vOneOverW"));
486         continue;
487      } else if (semantic_name == TGSI_SEMANTIC_PRIMID) {
488         Value *primID = LOAD(pPS, {0, SWR_PS_CONTEXT_primID}, "primID");
489         inputs[attrib][0] = wrap(VECTOR_SPLAT(JM()->mVWidth, primID));
490         inputs[attrib][1] = wrap(VIMMED1(0));
491         inputs[attrib][2] = wrap(VIMMED1(0));
492         inputs[attrib][3] = wrap(VIMMED1(0));
493         continue;
494      }
495
496      unsigned linkedAttrib =
497         locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base);
498      if (linkedAttrib == 0xFFFFFFFF) {
499         // not found - check for point sprite
500         if (ctx->rasterizer->sprite_coord_enable) {
501            linkedAttrib = ctx->vs->info.base.num_outputs - 1;
502            swr_fs->pointSpriteMask |= (1 << linkedAttrib);
503         } else {
504            fprintf(stderr,
505                    "Missing %s[%d]\n",
506                    tgsi_semantic_names[semantic_name],
507                    semantic_idx);
508            assert(0 && "attribute linkage not found");
509         }
510      }
511
512      if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
513         swr_fs->constantMask |= 1 << linkedAttrib;
514      } else if (interpMode == TGSI_INTERPOLATE_COLOR) {
515         swr_fs->flatConstantMask |= 1 << linkedAttrib;
516      }
517
518      for (int channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
519         if (mask & (1 << channel)) {
520            Value *indexA = C(linkedAttrib * 12 + channel);
521            Value *indexB = C(linkedAttrib * 12 + channel + 4);
522            Value *indexC = C(linkedAttrib * 12 + channel + 8);
523
524            if ((semantic_name == TGSI_SEMANTIC_COLOR)
525                && ctx->rasterizer->light_twoside) {
526               unsigned bcolorAttrib = locate_linkage(
527                  TGSI_SEMANTIC_BCOLOR, semantic_idx, &ctx->vs->info.base);
528
529               unsigned diff = 12 * (bcolorAttrib - linkedAttrib);
530
531               Value *back =
532                  XOR(C(1), LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), "backFace");
533
534               Value *offset = MUL(back, C(diff));
535               offset->setName("offset");
536
537               indexA = ADD(indexA, offset);
538               indexB = ADD(indexB, offset);
539               indexC = ADD(indexC, offset);
540
541               if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
542                  swr_fs->constantMask |= 1 << bcolorAttrib;
543               } else if (interpMode == TGSI_INTERPOLATE_COLOR) {
544                  swr_fs->flatConstantMask |= 1 << bcolorAttrib;
545               }
546            }
547
548            Value *va = VBROADCAST(LOAD(GEP(pAttribs, indexA)));
549            Value *vb = VBROADCAST(LOAD(GEP(pAttribs, indexB)));
550            Value *vc = VBROADCAST(LOAD(GEP(pAttribs, indexC)));
551
552            if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
553               inputs[attrib][channel] = wrap(va);
554            } else {
555               Value *vk = FSUB(FSUB(VIMMED1(1.0f), vi), vj);
556
557               vc = FMUL(vk, vc);
558
559               Value *interp = FMUL(va, vi);
560               Value *interp1 = FMUL(vb, vj);
561               interp = FADD(interp, interp1);
562               interp = FADD(interp, vc);
563               if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE)
564                  interp = FMUL(interp, vw);
565               inputs[attrib][channel] = wrap(interp);
566            }
567         }
568      }
569   }
570
571   sampler = swr_sampler_soa_create(key.sampler, PIPE_SHADER_FRAGMENT);
572
573   struct lp_bld_tgsi_system_values system_values;
574   memset(&system_values, 0, sizeof(system_values));
575
576   struct lp_build_mask_context mask;
577
578   if (swr_fs->info.base.uses_kill) {
579      Value *mask_val = LOAD(pPS, {0, SWR_PS_CONTEXT_activeMask}, "activeMask");
580      lp_build_mask_begin(
581         &mask, gallivm, lp_type_float_vec(32, 32 * 8), wrap(mask_val));
582   }
583
584   lp_build_tgsi_soa(gallivm,
585                     swr_fs->pipe.tokens,
586                     lp_type_float_vec(32, 32 * 8),
587                     swr_fs->info.base.uses_kill ? &mask : NULL, // mask
588                     wrap(consts_ptr),
589                     wrap(const_sizes_ptr),
590                     &system_values,
591                     inputs,
592                     outputs,
593                     wrap(hPrivateData),
594                     NULL, // thread data
595                     sampler, // sampler
596                     &swr_fs->info.base,
597                     NULL); // geometry shader face
598
599   sampler->destroy(sampler);
600
601   IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
602
603   for (uint32_t attrib = 0; attrib < swr_fs->info.base.num_outputs;
604        attrib++) {
605      switch (swr_fs->info.base.output_semantic_name[attrib]) {
606      case TGSI_SEMANTIC_POSITION: {
607         // write z
608         LLVMValueRef outZ =
609            LLVMBuildLoad(gallivm->builder, outputs[attrib][2], "");
610         STORE(unwrap(outZ), pPS, {0, SWR_PS_CONTEXT_vZ});
611         break;
612      }
613      case TGSI_SEMANTIC_COLOR: {
614         for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
615            if (!outputs[attrib][channel])
616               continue;
617
618            LLVMValueRef out =
619               LLVMBuildLoad(gallivm->builder, outputs[attrib][channel], "");
620            if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) {
621               for (uint32_t rt = 0; rt < key.nr_cbufs; rt++) {
622                  STORE(unwrap(out),
623                        pPS,
624                        {0, SWR_PS_CONTEXT_shaded, rt, channel});
625               }
626            } else {
627               STORE(unwrap(out),
628                     pPS,
629                     {0,
630                           SWR_PS_CONTEXT_shaded,
631                           swr_fs->info.base.output_semantic_index[attrib],
632                           channel});
633            }
634         }
635         break;
636      }
637      default: {
638         fprintf(stderr,
639                 "unknown output from FS %s[%d]\n",
640                 tgsi_semantic_names[swr_fs->info.base
641                                        .output_semantic_name[attrib]],
642                 swr_fs->info.base.output_semantic_index[attrib]);
643         break;
644      }
645      }
646   }
647
648   LLVMValueRef mask_result = 0;
649   if (swr_fs->info.base.uses_kill) {
650      mask_result = lp_build_mask_end(&mask);
651   }
652
653   IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
654
655   if (swr_fs->info.base.uses_kill) {
656      STORE(unwrap(mask_result), pPS, {0, SWR_PS_CONTEXT_activeMask});
657   }
658
659   RET_VOID();
660
661   gallivm_verify_function(gallivm, wrap(pFunction));
662
663   gallivm_compile_module(gallivm);
664
665   PFN_PIXEL_KERNEL kernel =
666      (PFN_PIXEL_KERNEL)gallivm_jit_function(gallivm, wrap(pFunction));
667   debug_printf("frag shader  %p\n", kernel);
668   assert(kernel && "Error: FragShader = NULL");
669
670#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR >= 5)
671   JM()->mIsModuleFinalized = true;
672#endif
673
674   return kernel;
675}
676
677PFN_PIXEL_KERNEL
678swr_compile_fs(struct swr_context *ctx, swr_jit_fs_key &key)
679{
680   BuilderSWR builder(
681      reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
682      "FS");
683   PFN_PIXEL_KERNEL func = builder.CompileFS(ctx, key);
684
685   ctx->fs->map.insert(std::make_pair(key, make_unique<VariantFS>(builder.gallivm, func)));
686   return func;
687}
688