1/****************************************************************************
2 * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 ***************************************************************************/
23
24// llvm redefines DEBUG
25#pragma push_macro("DEBUG")
26#undef DEBUG
27#include "JitManager.h"
28#include "llvm-c/Core.h"
29#include "llvm/Support/CBindingWrapping.h"
30#pragma pop_macro("DEBUG")
31
32#include "state.h"
33#include "state_llvm.h"
34#include "builder.h"
35
36#include "tgsi/tgsi_strings.h"
37#include "util/u_format.h"
38#include "gallivm/lp_bld_init.h"
39#include "gallivm/lp_bld_flow.h"
40#include "gallivm/lp_bld_struct.h"
41#include "gallivm/lp_bld_tgsi.h"
42
43#include "swr_context.h"
44#include "swr_context_llvm.h"
45#include "swr_resource.h"
46#include "swr_state.h"
47#include "swr_screen.h"
48
49using namespace SwrJit;
50
51static unsigned
52locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info);
53
54bool operator==(const swr_jit_fs_key &lhs, const swr_jit_fs_key &rhs)
55{
56   return !memcmp(&lhs, &rhs, sizeof(lhs));
57}
58
59bool operator==(const swr_jit_vs_key &lhs, const swr_jit_vs_key &rhs)
60{
61   return !memcmp(&lhs, &rhs, sizeof(lhs));
62}
63
64static void
65swr_generate_sampler_key(const struct lp_tgsi_info &info,
66                         struct swr_context *ctx,
67                         unsigned shader_type,
68                         struct swr_jit_sampler_key &key)
69{
70   key.nr_samplers = info.base.file_max[TGSI_FILE_SAMPLER] + 1;
71
72   for (unsigned i = 0; i < key.nr_samplers; i++) {
73      if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
74         lp_sampler_static_sampler_state(
75            &key.sampler[i].sampler_state,
76            ctx->samplers[shader_type][i]);
77      }
78   }
79
80   /*
81    * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes
82    * are dx10-style? Can't really have mixed opcodes, at least not
83    * if we want to skip the holes here (without rescanning tgsi).
84    */
85   if (info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
86      key.nr_sampler_views =
87         info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
88      for (unsigned i = 0; i < key.nr_sampler_views; i++) {
89         if (info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1 << i)) {
90            const struct pipe_sampler_view *view =
91               ctx->sampler_views[shader_type][i];
92            lp_sampler_static_texture_state(
93               &key.sampler[i].texture_state, view);
94            if (view) {
95               struct swr_resource *swr_res = swr_resource(view->texture);
96               const struct util_format_description *desc =
97                  util_format_description(view->format);
98               if (swr_res->has_depth && swr_res->has_stencil &&
99                   !util_format_has_depth(desc))
100                  key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT;
101            }
102         }
103      }
104   } else {
105      key.nr_sampler_views = key.nr_samplers;
106      for (unsigned i = 0; i < key.nr_sampler_views; i++) {
107         if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
108            const struct pipe_sampler_view *view =
109               ctx->sampler_views[shader_type][i];
110            lp_sampler_static_texture_state(
111               &key.sampler[i].texture_state, view);
112            if (view) {
113               struct swr_resource *swr_res = swr_resource(view->texture);
114               const struct util_format_description *desc =
115                  util_format_description(view->format);
116               if (swr_res->has_depth && swr_res->has_stencil &&
117                   !util_format_has_depth(desc))
118                  key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT;
119            }
120         }
121      }
122   }
123}
124
125void
126swr_generate_fs_key(struct swr_jit_fs_key &key,
127                    struct swr_context *ctx,
128                    swr_fragment_shader *swr_fs)
129{
130   memset(&key, 0, sizeof(key));
131
132   key.nr_cbufs = ctx->framebuffer.nr_cbufs;
133   key.light_twoside = ctx->rasterizer->light_twoside;
134   key.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable;
135   memcpy(&key.vs_output_semantic_name,
136          &ctx->vs->info.base.output_semantic_name,
137          sizeof(key.vs_output_semantic_name));
138   memcpy(&key.vs_output_semantic_idx,
139          &ctx->vs->info.base.output_semantic_index,
140          sizeof(key.vs_output_semantic_idx));
141
142   swr_generate_sampler_key(swr_fs->info, ctx, PIPE_SHADER_FRAGMENT, key);
143}
144
145void
146swr_generate_vs_key(struct swr_jit_vs_key &key,
147                    struct swr_context *ctx,
148                    swr_vertex_shader *swr_vs)
149{
150   memset(&key, 0, sizeof(key));
151
152   key.clip_plane_mask =
153      swr_vs->info.base.clipdist_writemask ?
154      swr_vs->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable :
155      ctx->rasterizer->clip_plane_enable;
156
157   swr_generate_sampler_key(swr_vs->info, ctx, PIPE_SHADER_VERTEX, key);
158}
159
160struct BuilderSWR : public Builder {
161   BuilderSWR(JitManager *pJitMgr, const char *pName)
162      : Builder(pJitMgr)
163   {
164      pJitMgr->SetupNewModule();
165      gallivm = gallivm_create(pName, wrap(&JM()->mContext));
166      pJitMgr->mpCurrentModule = unwrap(gallivm->module);
167   }
168
169   ~BuilderSWR() {
170      gallivm_free_ir(gallivm);
171   }
172
173   struct gallivm_state *gallivm;
174   PFN_VERTEX_FUNC CompileVS(struct swr_context *ctx, swr_jit_vs_key &key);
175   PFN_PIXEL_KERNEL CompileFS(struct swr_context *ctx, swr_jit_fs_key &key);
176};
177
178PFN_VERTEX_FUNC
179BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
180{
181   struct swr_vertex_shader *swr_vs = ctx->vs;
182
183   LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
184   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
185
186   memset(outputs, 0, sizeof(outputs));
187
188   AttrBuilder attrBuilder;
189   attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
190   AttributeSet attrSet = AttributeSet::get(
191      JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
192
193   std::vector<Type *> vsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
194                              PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)};
195   FunctionType *vsFuncType =
196      FunctionType::get(Type::getVoidTy(JM()->mContext), vsArgs, false);
197
198   // create new vertex shader function
199   auto pFunction = Function::Create(vsFuncType,
200                                     GlobalValue::ExternalLinkage,
201                                     "VS",
202                                     JM()->mpCurrentModule);
203   pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
204
205   BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
206   IRB()->SetInsertPoint(block);
207   LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
208
209   auto argitr = pFunction->arg_begin();
210   Value *hPrivateData = &*argitr++;
211   hPrivateData->setName("hPrivateData");
212   Value *pVsCtx = &*argitr++;
213   pVsCtx->setName("vsCtx");
214
215   Value *consts_ptr = GEP(hPrivateData, {C(0), C(swr_draw_context_constantVS)});
216
217   consts_ptr->setName("vs_constants");
218   Value *const_sizes_ptr =
219      GEP(hPrivateData, {0, swr_draw_context_num_constantsVS});
220   const_sizes_ptr->setName("num_vs_constants");
221
222   Value *vtxInput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVin});
223
224   for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) {
225      const unsigned mask = swr_vs->info.base.input_usage_mask[attrib];
226      for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
227         if (mask & (1 << channel)) {
228            inputs[attrib][channel] =
229               wrap(LOAD(vtxInput, {0, 0, attrib, channel}));
230         }
231      }
232   }
233
234   struct lp_build_sampler_soa *sampler =
235      swr_sampler_soa_create(key.sampler, PIPE_SHADER_VERTEX);
236
237   struct lp_bld_tgsi_system_values system_values;
238   memset(&system_values, 0, sizeof(system_values));
239   system_values.instance_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_InstanceID}));
240   system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID}));
241
242   lp_build_tgsi_soa(gallivm,
243                     swr_vs->pipe.tokens,
244                     lp_type_float_vec(32, 32 * 8),
245                     NULL, // mask
246                     wrap(consts_ptr),
247                     wrap(const_sizes_ptr),
248                     &system_values,
249                     inputs,
250                     outputs,
251                     wrap(hPrivateData), // (sampler context)
252                     NULL, // thread data
253                     sampler, // sampler
254                     &swr_vs->info.base,
255                     NULL); // geometry shader face
256
257   sampler->destroy(sampler);
258
259   IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
260
261   Value *vtxOutput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVout});
262
263   for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
264      for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_OUTPUTS; attrib++) {
265         if (!outputs[attrib][channel])
266            continue;
267
268         Value *val = LOAD(unwrap(outputs[attrib][channel]));
269
270         uint32_t outSlot = attrib;
271         if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE)
272            outSlot = VERTEX_POINT_SIZE_SLOT;
273         STORE(val, vtxOutput, {0, 0, outSlot, channel});
274      }
275   }
276
277   if (ctx->rasterizer->clip_plane_enable ||
278       swr_vs->info.base.culldist_writemask) {
279      unsigned clip_mask = ctx->rasterizer->clip_plane_enable;
280
281      unsigned cv = 0;
282      if (swr_vs->info.base.writes_clipvertex) {
283         cv = 1 + locate_linkage(TGSI_SEMANTIC_CLIPVERTEX, 0,
284                                 &swr_vs->info.base);
285      } else {
286         for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
287            if (swr_vs->info.base.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
288                swr_vs->info.base.output_semantic_index[i] == 0) {
289               cv = i;
290               break;
291            }
292         }
293      }
294      LLVMValueRef cx = LLVMBuildLoad(gallivm->builder, outputs[cv][0], "");
295      LLVMValueRef cy = LLVMBuildLoad(gallivm->builder, outputs[cv][1], "");
296      LLVMValueRef cz = LLVMBuildLoad(gallivm->builder, outputs[cv][2], "");
297      LLVMValueRef cw = LLVMBuildLoad(gallivm->builder, outputs[cv][3], "");
298
299      for (unsigned val = 0; val < PIPE_MAX_CLIP_PLANES; val++) {
300         // clip distance overrides user clip planes
301         if ((swr_vs->info.base.clipdist_writemask & clip_mask & (1 << val)) ||
302             ((swr_vs->info.base.culldist_writemask << swr_vs->info.base.num_written_clipdistance) & (1 << val))) {
303            unsigned cv = 1 + locate_linkage(TGSI_SEMANTIC_CLIPDIST, val < 4 ? 0 : 1,
304                                             &swr_vs->info.base);
305            if (val < 4) {
306               LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val], "");
307               STORE(unwrap(dist), vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_LO_SLOT, val});
308            } else {
309               LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val - 4], "");
310               STORE(unwrap(dist), vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4});
311            }
312            continue;
313         }
314
315         if (!(clip_mask & (1 << val)))
316            continue;
317
318         Value *px = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 0}));
319         Value *py = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 1}));
320         Value *pz = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 2}));
321         Value *pw = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 3}));
322         Value *dist = FADD(FMUL(unwrap(cx), VBROADCAST(px)),
323                            FADD(FMUL(unwrap(cy), VBROADCAST(py)),
324                                 FADD(FMUL(unwrap(cz), VBROADCAST(pz)),
325                                      FMUL(unwrap(cw), VBROADCAST(pw)))));
326
327         if (val < 4)
328            STORE(dist, vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_LO_SLOT, val});
329         else
330            STORE(dist, vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4});
331      }
332   }
333
334   RET_VOID();
335
336   gallivm_verify_function(gallivm, wrap(pFunction));
337   gallivm_compile_module(gallivm);
338
339   //   lp_debug_dump_value(func);
340
341   PFN_VERTEX_FUNC pFunc =
342      (PFN_VERTEX_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
343
344   debug_printf("vert shader  %p\n", pFunc);
345   assert(pFunc && "Error: VertShader = NULL");
346
347   JM()->mIsModuleFinalized = true;
348
349   return pFunc;
350}
351
352PFN_VERTEX_FUNC
353swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key)
354{
355   BuilderSWR builder(
356      reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
357      "VS");
358   PFN_VERTEX_FUNC func = builder.CompileVS(ctx, key);
359
360   ctx->vs->map.insert(std::make_pair(key, make_unique<VariantVS>(builder.gallivm, func)));
361   return func;
362}
363
364static unsigned
365locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info)
366{
367   for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
368      if ((info->output_semantic_name[i] == name)
369          && (info->output_semantic_index[i] == index)) {
370         return i - 1; // position is not part of the linkage
371      }
372   }
373
374   return 0xFFFFFFFF;
375}
376
377PFN_PIXEL_KERNEL
378BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key)
379{
380   struct swr_fragment_shader *swr_fs = ctx->fs;
381
382   LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
383   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
384
385   memset(inputs, 0, sizeof(inputs));
386   memset(outputs, 0, sizeof(outputs));
387
388   struct lp_build_sampler_soa *sampler = NULL;
389
390   AttrBuilder attrBuilder;
391   attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
392   AttributeSet attrSet = AttributeSet::get(
393      JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
394
395   std::vector<Type *> fsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
396                              PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)};
397   FunctionType *funcType =
398      FunctionType::get(Type::getVoidTy(JM()->mContext), fsArgs, false);
399
400   auto pFunction = Function::Create(funcType,
401                                     GlobalValue::ExternalLinkage,
402                                     "FS",
403                                     JM()->mpCurrentModule);
404   pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
405
406   BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
407   IRB()->SetInsertPoint(block);
408   LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
409
410   auto args = pFunction->arg_begin();
411   Value *hPrivateData = &*args++;
412   hPrivateData->setName("hPrivateData");
413   Value *pPS = &*args++;
414   pPS->setName("psCtx");
415
416   Value *consts_ptr = GEP(hPrivateData, {0, swr_draw_context_constantFS});
417   consts_ptr->setName("fs_constants");
418   Value *const_sizes_ptr =
419      GEP(hPrivateData, {0, swr_draw_context_num_constantsFS});
420   const_sizes_ptr->setName("num_fs_constants");
421
422   // load *pAttribs, *pPerspAttribs
423   Value *pRawAttribs = LOAD(pPS, {0, SWR_PS_CONTEXT_pAttribs}, "pRawAttribs");
424   Value *pPerspAttribs =
425      LOAD(pPS, {0, SWR_PS_CONTEXT_pPerspAttribs}, "pPerspAttribs");
426
427   swr_fs->constantMask = 0;
428   swr_fs->flatConstantMask = 0;
429   swr_fs->pointSpriteMask = 0;
430
431   for (int attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) {
432      const unsigned mask = swr_fs->info.base.input_usage_mask[attrib];
433      const unsigned interpMode = swr_fs->info.base.input_interpolate[attrib];
434      const unsigned interpLoc = swr_fs->info.base.input_interpolate_loc[attrib];
435
436      if (!mask)
437         continue;
438
439      // load i,j
440      Value *vi = nullptr, *vj = nullptr;
441      switch (interpLoc) {
442      case TGSI_INTERPOLATE_LOC_CENTER:
443         vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_center}, "i");
444         vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_center}, "j");
445         break;
446      case TGSI_INTERPOLATE_LOC_CENTROID:
447         vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_centroid}, "i");
448         vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_centroid}, "j");
449         break;
450      case TGSI_INTERPOLATE_LOC_SAMPLE:
451         vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_sample}, "i");
452         vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_sample}, "j");
453         break;
454      }
455
456      // load/compute w
457      Value *vw = nullptr, *pAttribs;
458      if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE ||
459          interpMode == TGSI_INTERPOLATE_COLOR) {
460         pAttribs = pPerspAttribs;
461         switch (interpLoc) {
462         case TGSI_INTERPOLATE_LOC_CENTER:
463            vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}));
464            break;
465         case TGSI_INTERPOLATE_LOC_CENTROID:
466            vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_centroid}));
467            break;
468         case TGSI_INTERPOLATE_LOC_SAMPLE:
469            vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_sample}));
470            break;
471         }
472      } else {
473         pAttribs = pRawAttribs;
474         vw = VIMMED1(1.f);
475      }
476
477      vw->setName("w");
478
479      ubyte semantic_name = swr_fs->info.base.input_semantic_name[attrib];
480      ubyte semantic_idx = swr_fs->info.base.input_semantic_index[attrib];
481
482      if (semantic_name == TGSI_SEMANTIC_FACE) {
483         Value *ff =
484            UI_TO_FP(LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), mFP32Ty);
485         ff = FSUB(FMUL(ff, C(2.0f)), C(1.0f));
486         ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vFrontFace");
487
488         inputs[attrib][0] = wrap(ff);
489         inputs[attrib][1] = wrap(VIMMED1(0.0f));
490         inputs[attrib][2] = wrap(VIMMED1(0.0f));
491         inputs[attrib][3] = wrap(VIMMED1(1.0f));
492         continue;
493      } else if (semantic_name == TGSI_SEMANTIC_POSITION) { // gl_FragCoord
494         if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] ==
495             TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER) {
496            inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_center}, "vX"));
497            inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_center}, "vY"));
498         } else {
499            inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_UL}, "vX"));
500            inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_UL}, "vY"));
501         }
502         inputs[attrib][2] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vZ}, "vZ"));
503         inputs[attrib][3] =
504            wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}, "vOneOverW"));
505         continue;
506      } else if (semantic_name == TGSI_SEMANTIC_PRIMID) {
507         Value *primID = LOAD(pPS, {0, SWR_PS_CONTEXT_primID}, "primID");
508         inputs[attrib][0] = wrap(VECTOR_SPLAT(JM()->mVWidth, primID));
509         inputs[attrib][1] = wrap(VIMMED1(0));
510         inputs[attrib][2] = wrap(VIMMED1(0));
511         inputs[attrib][3] = wrap(VIMMED1(0));
512         continue;
513      }
514
515      unsigned linkedAttrib =
516         locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base);
517      if (semantic_name == TGSI_SEMANTIC_GENERIC &&
518          key.sprite_coord_enable & (1 << semantic_idx)) {
519         /* we add an extra attrib to the backendState in swr_update_derived. */
520         linkedAttrib = ctx->vs->info.base.num_outputs - 1;
521         swr_fs->pointSpriteMask |= (1 << linkedAttrib);
522      } else if (linkedAttrib == 0xFFFFFFFF) {
523         inputs[attrib][0] = wrap(VIMMED1(0.0f));
524         inputs[attrib][1] = wrap(VIMMED1(0.0f));
525         inputs[attrib][2] = wrap(VIMMED1(0.0f));
526         inputs[attrib][3] = wrap(VIMMED1(1.0f));
527         /* If we're reading in color and 2-sided lighting is enabled, we have
528          * to keep going.
529          */
530         if (semantic_name != TGSI_SEMANTIC_COLOR || !key.light_twoside)
531            continue;
532      } else {
533         if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
534            swr_fs->constantMask |= 1 << linkedAttrib;
535         } else if (interpMode == TGSI_INTERPOLATE_COLOR) {
536            swr_fs->flatConstantMask |= 1 << linkedAttrib;
537         }
538      }
539
540      unsigned bcolorAttrib = 0xFFFFFFFF;
541      Value *offset = NULL;
542      if (semantic_name == TGSI_SEMANTIC_COLOR && key.light_twoside) {
543         bcolorAttrib = locate_linkage(
544               TGSI_SEMANTIC_BCOLOR, semantic_idx, &ctx->vs->info.base);
545         /* Neither front nor back colors were available. Nothing to load. */
546         if (bcolorAttrib == 0xFFFFFFFF && linkedAttrib == 0xFFFFFFFF)
547            continue;
548         /* If there is no front color, just always use the back color. */
549         if (linkedAttrib == 0xFFFFFFFF)
550            linkedAttrib = bcolorAttrib;
551
552         if (bcolorAttrib != 0xFFFFFFFF) {
553            if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
554               swr_fs->constantMask |= 1 << bcolorAttrib;
555            } else if (interpMode == TGSI_INTERPOLATE_COLOR) {
556               swr_fs->flatConstantMask |= 1 << bcolorAttrib;
557            }
558
559            unsigned diff = 12 * (bcolorAttrib - linkedAttrib);
560
561            if (diff) {
562               Value *back =
563                  XOR(C(1), LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), "backFace");
564
565               offset = MUL(back, C(diff));
566               offset->setName("offset");
567            }
568         }
569      }
570
571      for (int channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
572         if (mask & (1 << channel)) {
573            Value *indexA = C(linkedAttrib * 12 + channel);
574            Value *indexB = C(linkedAttrib * 12 + channel + 4);
575            Value *indexC = C(linkedAttrib * 12 + channel + 8);
576
577            if (offset) {
578               indexA = ADD(indexA, offset);
579               indexB = ADD(indexB, offset);
580               indexC = ADD(indexC, offset);
581            }
582
583            Value *va = VBROADCAST(LOAD(GEP(pAttribs, indexA)));
584            Value *vb = VBROADCAST(LOAD(GEP(pAttribs, indexB)));
585            Value *vc = VBROADCAST(LOAD(GEP(pAttribs, indexC)));
586
587            if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
588               inputs[attrib][channel] = wrap(va);
589            } else {
590               Value *vk = FSUB(FSUB(VIMMED1(1.0f), vi), vj);
591
592               vc = FMUL(vk, vc);
593
594               Value *interp = FMUL(va, vi);
595               Value *interp1 = FMUL(vb, vj);
596               interp = FADD(interp, interp1);
597               interp = FADD(interp, vc);
598               if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE ||
599                   interpMode == TGSI_INTERPOLATE_COLOR)
600                  interp = FMUL(interp, vw);
601               inputs[attrib][channel] = wrap(interp);
602            }
603         }
604      }
605   }
606
607   sampler = swr_sampler_soa_create(key.sampler, PIPE_SHADER_FRAGMENT);
608
609   struct lp_bld_tgsi_system_values system_values;
610   memset(&system_values, 0, sizeof(system_values));
611
612   struct lp_build_mask_context mask;
613
614   if (swr_fs->info.base.uses_kill) {
615      Value *mask_val = LOAD(pPS, {0, SWR_PS_CONTEXT_activeMask}, "activeMask");
616      lp_build_mask_begin(
617         &mask, gallivm, lp_type_float_vec(32, 32 * 8), wrap(mask_val));
618   }
619
620   lp_build_tgsi_soa(gallivm,
621                     swr_fs->pipe.tokens,
622                     lp_type_float_vec(32, 32 * 8),
623                     swr_fs->info.base.uses_kill ? &mask : NULL, // mask
624                     wrap(consts_ptr),
625                     wrap(const_sizes_ptr),
626                     &system_values,
627                     inputs,
628                     outputs,
629                     wrap(hPrivateData),
630                     NULL, // thread data
631                     sampler, // sampler
632                     &swr_fs->info.base,
633                     NULL); // geometry shader face
634
635   sampler->destroy(sampler);
636
637   IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
638
639   for (uint32_t attrib = 0; attrib < swr_fs->info.base.num_outputs;
640        attrib++) {
641      switch (swr_fs->info.base.output_semantic_name[attrib]) {
642      case TGSI_SEMANTIC_POSITION: {
643         // write z
644         LLVMValueRef outZ =
645            LLVMBuildLoad(gallivm->builder, outputs[attrib][2], "");
646         STORE(unwrap(outZ), pPS, {0, SWR_PS_CONTEXT_vZ});
647         break;
648      }
649      case TGSI_SEMANTIC_COLOR: {
650         for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
651            if (!outputs[attrib][channel])
652               continue;
653
654            LLVMValueRef out =
655               LLVMBuildLoad(gallivm->builder, outputs[attrib][channel], "");
656            if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
657                swr_fs->info.base.output_semantic_index[attrib] == 0) {
658               for (uint32_t rt = 0; rt < key.nr_cbufs; rt++) {
659                  STORE(unwrap(out),
660                        pPS,
661                        {0, SWR_PS_CONTEXT_shaded, rt, channel});
662               }
663            } else {
664               STORE(unwrap(out),
665                     pPS,
666                     {0,
667                           SWR_PS_CONTEXT_shaded,
668                           swr_fs->info.base.output_semantic_index[attrib],
669                           channel});
670            }
671         }
672         break;
673      }
674      default: {
675         fprintf(stderr,
676                 "unknown output from FS %s[%d]\n",
677                 tgsi_semantic_names[swr_fs->info.base
678                                        .output_semantic_name[attrib]],
679                 swr_fs->info.base.output_semantic_index[attrib]);
680         break;
681      }
682      }
683   }
684
685   LLVMValueRef mask_result = 0;
686   if (swr_fs->info.base.uses_kill) {
687      mask_result = lp_build_mask_end(&mask);
688   }
689
690   IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
691
692   if (swr_fs->info.base.uses_kill) {
693      STORE(unwrap(mask_result), pPS, {0, SWR_PS_CONTEXT_activeMask});
694   }
695
696   RET_VOID();
697
698   gallivm_verify_function(gallivm, wrap(pFunction));
699
700   gallivm_compile_module(gallivm);
701
702   PFN_PIXEL_KERNEL kernel =
703      (PFN_PIXEL_KERNEL)gallivm_jit_function(gallivm, wrap(pFunction));
704   debug_printf("frag shader  %p\n", kernel);
705   assert(kernel && "Error: FragShader = NULL");
706
707   JM()->mIsModuleFinalized = true;
708
709   return kernel;
710}
711
712PFN_PIXEL_KERNEL
713swr_compile_fs(struct swr_context *ctx, swr_jit_fs_key &key)
714{
715   BuilderSWR builder(
716      reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
717      "FS");
718   PFN_PIXEL_KERNEL func = builder.CompileFS(ctx, key);
719
720   ctx->fs->map.insert(std::make_pair(key, make_unique<VariantFS>(builder.gallivm, func)));
721   return func;
722}
723