1/****************************************************************************
2 * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 ***************************************************************************/
23
24// llvm redefines DEBUG
25#pragma push_macro("DEBUG")
26#undef DEBUG
27#include "JitManager.h"
28#pragma pop_macro("DEBUG")
29
30#include "common/os.h"
31#include "jit_api.h"
32#include "state_llvm.h"
33
34#include "gallivm/lp_bld_tgsi.h"
35#include "util/u_format.h"
36
37#include "util/u_memory.h"
38#include "util/u_inlines.h"
39#include "util/u_helpers.h"
40#include "util/u_framebuffer.h"
41#include "util/u_viewport.h"
42
43#include "swr_state.h"
44#include "swr_context.h"
45#include "swr_context_llvm.h"
46#include "swr_screen.h"
47#include "swr_resource.h"
48#include "swr_tex_sample.h"
49#include "swr_scratch.h"
50#include "swr_shader.h"
51#include "swr_fence.h"
52
53/* These should be pulled out into separate files as necessary
54 * Just initializing everything here to get going. */
55
56static void *
57swr_create_blend_state(struct pipe_context *pipe,
58                       const struct pipe_blend_state *blend)
59{
60   struct swr_blend_state *state = CALLOC_STRUCT(swr_blend_state);
61
62   memcpy(&state->pipe, blend, sizeof(*blend));
63
64   struct pipe_blend_state *pipe_blend = &state->pipe;
65
66   for (int target = 0;
67        target < std::min(SWR_NUM_RENDERTARGETS, PIPE_MAX_COLOR_BUFS);
68        target++) {
69
70      struct pipe_rt_blend_state *rt_blend = &pipe_blend->rt[target];
71      SWR_RENDER_TARGET_BLEND_STATE &blendState =
72         state->blendState.renderTarget[target];
73      RENDER_TARGET_BLEND_COMPILE_STATE &compileState =
74         state->compileState[target];
75
76      if (target != 0 && !pipe_blend->independent_blend_enable) {
77         memcpy(&compileState,
78                &state->compileState[0],
79                sizeof(RENDER_TARGET_BLEND_COMPILE_STATE));
80         continue;
81      }
82
83      compileState.blendEnable = rt_blend->blend_enable;
84      if (compileState.blendEnable) {
85         compileState.sourceAlphaBlendFactor =
86            swr_convert_blend_factor(rt_blend->alpha_src_factor);
87         compileState.destAlphaBlendFactor =
88            swr_convert_blend_factor(rt_blend->alpha_dst_factor);
89         compileState.sourceBlendFactor =
90            swr_convert_blend_factor(rt_blend->rgb_src_factor);
91         compileState.destBlendFactor =
92            swr_convert_blend_factor(rt_blend->rgb_dst_factor);
93
94         compileState.colorBlendFunc =
95            swr_convert_blend_func(rt_blend->rgb_func);
96         compileState.alphaBlendFunc =
97            swr_convert_blend_func(rt_blend->alpha_func);
98      }
99      compileState.logicOpEnable = state->pipe.logicop_enable;
100      if (compileState.logicOpEnable) {
101         compileState.logicOpFunc =
102            swr_convert_logic_op(state->pipe.logicop_func);
103      }
104
105      blendState.writeDisableRed =
106         (rt_blend->colormask & PIPE_MASK_R) ? 0 : 1;
107      blendState.writeDisableGreen =
108         (rt_blend->colormask & PIPE_MASK_G) ? 0 : 1;
109      blendState.writeDisableBlue =
110         (rt_blend->colormask & PIPE_MASK_B) ? 0 : 1;
111      blendState.writeDisableAlpha =
112         (rt_blend->colormask & PIPE_MASK_A) ? 0 : 1;
113
114      if (rt_blend->colormask == 0)
115         compileState.blendEnable = false;
116   }
117
118   return state;
119}
120
121static void
122swr_bind_blend_state(struct pipe_context *pipe, void *blend)
123{
124   struct swr_context *ctx = swr_context(pipe);
125
126   if (ctx->blend == blend)
127      return;
128
129   ctx->blend = (swr_blend_state *)blend;
130
131   ctx->dirty |= SWR_NEW_BLEND;
132}
133
134static void
135swr_delete_blend_state(struct pipe_context *pipe, void *blend)
136{
137   FREE(blend);
138}
139
140static void
141swr_set_blend_color(struct pipe_context *pipe,
142                    const struct pipe_blend_color *color)
143{
144   struct swr_context *ctx = swr_context(pipe);
145
146   ctx->blend_color = *color;
147
148   ctx->dirty |= SWR_NEW_BLEND;
149}
150
151static void
152swr_set_stencil_ref(struct pipe_context *pipe,
153                    const struct pipe_stencil_ref *ref)
154{
155   struct swr_context *ctx = swr_context(pipe);
156
157   ctx->stencil_ref = *ref;
158
159   ctx->dirty |= SWR_NEW_DEPTH_STENCIL_ALPHA;
160}
161
162static void *
163swr_create_depth_stencil_state(
164   struct pipe_context *pipe,
165   const struct pipe_depth_stencil_alpha_state *depth_stencil)
166{
167   struct pipe_depth_stencil_alpha_state *state;
168
169   state = (pipe_depth_stencil_alpha_state *)mem_dup(depth_stencil,
170                                                     sizeof *depth_stencil);
171
172   return state;
173}
174
175static void
176swr_bind_depth_stencil_state(struct pipe_context *pipe, void *depth_stencil)
177{
178   struct swr_context *ctx = swr_context(pipe);
179
180   if (ctx->depth_stencil == (pipe_depth_stencil_alpha_state *)depth_stencil)
181      return;
182
183   ctx->depth_stencil = (pipe_depth_stencil_alpha_state *)depth_stencil;
184
185   ctx->dirty |= SWR_NEW_DEPTH_STENCIL_ALPHA;
186}
187
188static void
189swr_delete_depth_stencil_state(struct pipe_context *pipe, void *depth)
190{
191   FREE(depth);
192}
193
194
195static void *
196swr_create_rasterizer_state(struct pipe_context *pipe,
197                            const struct pipe_rasterizer_state *rast)
198{
199   struct pipe_rasterizer_state *state;
200   state = (pipe_rasterizer_state *)mem_dup(rast, sizeof *rast);
201
202   return state;
203}
204
205static void
206swr_bind_rasterizer_state(struct pipe_context *pipe, void *handle)
207{
208   struct swr_context *ctx = swr_context(pipe);
209   const struct pipe_rasterizer_state *rasterizer =
210      (const struct pipe_rasterizer_state *)handle;
211
212   if (ctx->rasterizer == (pipe_rasterizer_state *)rasterizer)
213      return;
214
215   ctx->rasterizer = (pipe_rasterizer_state *)rasterizer;
216
217   ctx->dirty |= SWR_NEW_RASTERIZER;
218}
219
220static void
221swr_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer)
222{
223   FREE(rasterizer);
224}
225
226
227static void *
228swr_create_sampler_state(struct pipe_context *pipe,
229                         const struct pipe_sampler_state *sampler)
230{
231   struct pipe_sampler_state *state =
232      (pipe_sampler_state *)mem_dup(sampler, sizeof *sampler);
233
234   return state;
235}
236
237static void
238swr_bind_sampler_states(struct pipe_context *pipe,
239                        enum pipe_shader_type shader,
240                        unsigned start,
241                        unsigned num,
242                        void **samplers)
243{
244   struct swr_context *ctx = swr_context(pipe);
245   unsigned i;
246
247   assert(shader < PIPE_SHADER_TYPES);
248   assert(start + num <= ARRAY_SIZE(ctx->samplers[shader]));
249
250   /* set the new samplers */
251   ctx->num_samplers[shader] = num;
252   for (i = 0; i < num; i++) {
253      ctx->samplers[shader][start + i] = (pipe_sampler_state *)samplers[i];
254   }
255
256   ctx->dirty |= SWR_NEW_SAMPLER;
257}
258
259static void
260swr_delete_sampler_state(struct pipe_context *pipe, void *sampler)
261{
262   FREE(sampler);
263}
264
265
266static struct pipe_sampler_view *
267swr_create_sampler_view(struct pipe_context *pipe,
268                        struct pipe_resource *texture,
269                        const struct pipe_sampler_view *templ)
270{
271   struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view);
272
273   if (view) {
274      *view = *templ;
275      view->reference.count = 1;
276      view->texture = NULL;
277      pipe_resource_reference(&view->texture, texture);
278      view->context = pipe;
279   }
280
281   return view;
282}
283
284static void
285swr_set_sampler_views(struct pipe_context *pipe,
286                      enum pipe_shader_type shader,
287                      unsigned start,
288                      unsigned num,
289                      struct pipe_sampler_view **views)
290{
291   struct swr_context *ctx = swr_context(pipe);
292   uint i;
293
294   assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS);
295
296   assert(shader < PIPE_SHADER_TYPES);
297   assert(start + num <= ARRAY_SIZE(ctx->sampler_views[shader]));
298
299   /* set the new sampler views */
300   ctx->num_sampler_views[shader] = num;
301   for (i = 0; i < num; i++) {
302      /* Note: we're using pipe_sampler_view_release() here to work around
303       * a possible crash when the old view belongs to another context that
304       * was already destroyed.
305       */
306      pipe_sampler_view_release(pipe, &ctx->sampler_views[shader][start + i]);
307      pipe_sampler_view_reference(&ctx->sampler_views[shader][start + i],
308                                  views[i]);
309   }
310
311   ctx->dirty |= SWR_NEW_SAMPLER_VIEW;
312}
313
314static void
315swr_sampler_view_destroy(struct pipe_context *pipe,
316                         struct pipe_sampler_view *view)
317{
318   pipe_resource_reference(&view->texture, NULL);
319   FREE(view);
320}
321
322static void *
323swr_create_vs_state(struct pipe_context *pipe,
324                    const struct pipe_shader_state *vs)
325{
326   struct swr_vertex_shader *swr_vs = new swr_vertex_shader;
327   if (!swr_vs)
328      return NULL;
329
330   swr_vs->pipe.tokens = tgsi_dup_tokens(vs->tokens);
331   swr_vs->pipe.stream_output = vs->stream_output;
332
333   lp_build_tgsi_info(vs->tokens, &swr_vs->info);
334
335   swr_vs->soState = {0};
336
337   if (swr_vs->pipe.stream_output.num_outputs) {
338      pipe_stream_output_info *stream_output = &swr_vs->pipe.stream_output;
339
340      swr_vs->soState.soEnable = true;
341      // soState.rasterizerDisable set on state dirty
342      // soState.streamToRasterizer not used
343
344      for (uint32_t i = 0; i < stream_output->num_outputs; i++) {
345         swr_vs->soState.streamMasks[stream_output->output[i].stream] |=
346            1 << (stream_output->output[i].register_index - 1);
347      }
348      for (uint32_t i = 0; i < MAX_SO_STREAMS; i++) {
349        swr_vs->soState.streamNumEntries[i] =
350             _mm_popcnt_u32(swr_vs->soState.streamMasks[i]);
351       }
352   }
353
354   return swr_vs;
355}
356
357static void
358swr_bind_vs_state(struct pipe_context *pipe, void *vs)
359{
360   struct swr_context *ctx = swr_context(pipe);
361
362   if (ctx->vs == vs)
363      return;
364
365   ctx->vs = (swr_vertex_shader *)vs;
366   ctx->dirty |= SWR_NEW_VS;
367}
368
369static void
370swr_delete_vs_state(struct pipe_context *pipe, void *vs)
371{
372   struct swr_vertex_shader *swr_vs = (swr_vertex_shader *)vs;
373   FREE((void *)swr_vs->pipe.tokens);
374   struct swr_screen *screen = swr_screen(pipe->screen);
375
376   /* Defer deletion of vs state */
377   swr_fence_work_delete_vs(screen->flush_fence, swr_vs);
378}
379
380static void *
381swr_create_fs_state(struct pipe_context *pipe,
382                    const struct pipe_shader_state *fs)
383{
384   struct swr_fragment_shader *swr_fs = new swr_fragment_shader;
385   if (!swr_fs)
386      return NULL;
387
388   swr_fs->pipe.tokens = tgsi_dup_tokens(fs->tokens);
389
390   lp_build_tgsi_info(fs->tokens, &swr_fs->info);
391
392   return swr_fs;
393}
394
395
396static void
397swr_bind_fs_state(struct pipe_context *pipe, void *fs)
398{
399   struct swr_context *ctx = swr_context(pipe);
400
401   if (ctx->fs == fs)
402      return;
403
404   ctx->fs = (swr_fragment_shader *)fs;
405   ctx->dirty |= SWR_NEW_FS;
406}
407
408static void
409swr_delete_fs_state(struct pipe_context *pipe, void *fs)
410{
411   struct swr_fragment_shader *swr_fs = (swr_fragment_shader *)fs;
412   FREE((void *)swr_fs->pipe.tokens);
413   struct swr_screen *screen = swr_screen(pipe->screen);
414
415   /* Defer deleton of fs state */
416   swr_fence_work_delete_fs(screen->flush_fence, swr_fs);
417}
418
419
420static void
421swr_set_constant_buffer(struct pipe_context *pipe,
422                        uint shader,
423                        uint index,
424                        const struct pipe_constant_buffer *cb)
425{
426   struct swr_context *ctx = swr_context(pipe);
427   struct pipe_resource *constants = cb ? cb->buffer : NULL;
428
429   assert(shader < PIPE_SHADER_TYPES);
430   assert(index < ARRAY_SIZE(ctx->constants[shader]));
431
432   /* note: reference counting */
433   util_copy_constant_buffer(&ctx->constants[shader][index], cb);
434
435   if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_GEOMETRY) {
436      ctx->dirty |= SWR_NEW_VSCONSTANTS;
437   } else if (shader == PIPE_SHADER_FRAGMENT) {
438      ctx->dirty |= SWR_NEW_FSCONSTANTS;
439   }
440
441   if (cb && cb->user_buffer) {
442      pipe_resource_reference(&constants, NULL);
443   }
444}
445
446
447static void *
448swr_create_vertex_elements_state(struct pipe_context *pipe,
449                                 unsigned num_elements,
450                                 const struct pipe_vertex_element *attribs)
451{
452   struct swr_vertex_element_state *velems;
453   assert(num_elements <= PIPE_MAX_ATTRIBS);
454   velems = CALLOC_STRUCT(swr_vertex_element_state);
455   if (velems) {
456      velems->fsState.bVertexIDOffsetEnable = true;
457      velems->fsState.numAttribs = num_elements;
458      for (unsigned i = 0; i < num_elements; i++) {
459         // XXX: we should do this keyed on the VS usage info
460
461         const struct util_format_description *desc =
462            util_format_description(attribs[i].src_format);
463
464         velems->fsState.layout[i].AlignedByteOffset = attribs[i].src_offset;
465         velems->fsState.layout[i].Format =
466            mesa_to_swr_format(attribs[i].src_format);
467         velems->fsState.layout[i].StreamIndex =
468            attribs[i].vertex_buffer_index;
469         velems->fsState.layout[i].InstanceEnable =
470            attribs[i].instance_divisor != 0;
471         velems->fsState.layout[i].ComponentControl0 =
472            desc->channel[0].type != UTIL_FORMAT_TYPE_VOID
473            ? ComponentControl::StoreSrc
474            : ComponentControl::Store0;
475         velems->fsState.layout[i].ComponentControl1 =
476            desc->channel[1].type != UTIL_FORMAT_TYPE_VOID
477            ? ComponentControl::StoreSrc
478            : ComponentControl::Store0;
479         velems->fsState.layout[i].ComponentControl2 =
480            desc->channel[2].type != UTIL_FORMAT_TYPE_VOID
481            ? ComponentControl::StoreSrc
482            : ComponentControl::Store0;
483         velems->fsState.layout[i].ComponentControl3 =
484            desc->channel[3].type != UTIL_FORMAT_TYPE_VOID
485            ? ComponentControl::StoreSrc
486            : ComponentControl::Store1Fp;
487         velems->fsState.layout[i].ComponentPacking = ComponentEnable::XYZW;
488         velems->fsState.layout[i].InstanceDataStepRate =
489            attribs[i].instance_divisor;
490
491         /* Calculate the pitch of each stream */
492         const SWR_FORMAT_INFO &swr_desc = GetFormatInfo(
493            mesa_to_swr_format(attribs[i].src_format));
494         velems->stream_pitch[attribs[i].vertex_buffer_index] += swr_desc.Bpp;
495
496         if (attribs[i].instance_divisor != 0) {
497            velems->instanced_bufs |= 1U << attribs[i].vertex_buffer_index;
498            uint32_t *min_instance_div =
499               &velems->min_instance_div[attribs[i].vertex_buffer_index];
500            if (!*min_instance_div ||
501                attribs[i].instance_divisor < *min_instance_div)
502               *min_instance_div = attribs[i].instance_divisor;
503         }
504      }
505   }
506
507   return velems;
508}
509
510static void
511swr_bind_vertex_elements_state(struct pipe_context *pipe, void *velems)
512{
513   struct swr_context *ctx = swr_context(pipe);
514   struct swr_vertex_element_state *swr_velems =
515      (struct swr_vertex_element_state *)velems;
516
517   ctx->velems = swr_velems;
518   ctx->dirty |= SWR_NEW_VERTEX;
519}
520
521static void
522swr_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
523{
524   /* XXX Need to destroy fetch shader? */
525   FREE(velems);
526}
527
528
529static void
530swr_set_vertex_buffers(struct pipe_context *pipe,
531                       unsigned start_slot,
532                       unsigned num_elements,
533                       const struct pipe_vertex_buffer *buffers)
534{
535   struct swr_context *ctx = swr_context(pipe);
536
537   assert(num_elements <= PIPE_MAX_ATTRIBS);
538
539   util_set_vertex_buffers_count(ctx->vertex_buffer,
540                                 &ctx->num_vertex_buffers,
541                                 buffers,
542                                 start_slot,
543                                 num_elements);
544
545   ctx->dirty |= SWR_NEW_VERTEX;
546}
547
548
549static void
550swr_set_index_buffer(struct pipe_context *pipe,
551                     const struct pipe_index_buffer *ib)
552{
553   struct swr_context *ctx = swr_context(pipe);
554
555   if (ib)
556      memcpy(&ctx->index_buffer, ib, sizeof(ctx->index_buffer));
557   else
558      memset(&ctx->index_buffer, 0, sizeof(ctx->index_buffer));
559
560   ctx->dirty |= SWR_NEW_VERTEX;
561}
562
563static void
564swr_set_polygon_stipple(struct pipe_context *pipe,
565                        const struct pipe_poly_stipple *stipple)
566{
567   struct swr_context *ctx = swr_context(pipe);
568
569   ctx->poly_stipple = *stipple; /* struct copy */
570   ctx->dirty |= SWR_NEW_STIPPLE;
571}
572
573static void
574swr_set_clip_state(struct pipe_context *pipe,
575                   const struct pipe_clip_state *clip)
576{
577   struct swr_context *ctx = swr_context(pipe);
578
579   ctx->clip = *clip;
580   /* XXX Unimplemented, but prevents crash */
581
582   ctx->dirty |= SWR_NEW_CLIP;
583}
584
585
586static void
587swr_set_scissor_states(struct pipe_context *pipe,
588                       unsigned start_slot,
589                       unsigned num_viewports,
590                       const struct pipe_scissor_state *scissor)
591{
592   struct swr_context *ctx = swr_context(pipe);
593
594   ctx->scissor = *scissor;
595   ctx->swr_scissor.xmin = scissor->minx;
596   ctx->swr_scissor.xmax = scissor->maxx;
597   ctx->swr_scissor.ymin = scissor->miny;
598   ctx->swr_scissor.ymax = scissor->maxy;
599   ctx->dirty |= SWR_NEW_SCISSOR;
600}
601
602static void
603swr_set_viewport_states(struct pipe_context *pipe,
604                        unsigned start_slot,
605                        unsigned num_viewports,
606                        const struct pipe_viewport_state *vpt)
607{
608   struct swr_context *ctx = swr_context(pipe);
609
610   ctx->viewport = *vpt;
611   ctx->dirty |= SWR_NEW_VIEWPORT;
612}
613
614
615static void
616swr_set_framebuffer_state(struct pipe_context *pipe,
617                          const struct pipe_framebuffer_state *fb)
618{
619   struct swr_context *ctx = swr_context(pipe);
620
621   boolean changed = !util_framebuffer_state_equal(&ctx->framebuffer, fb);
622
623   assert(fb->width <= KNOB_GUARDBAND_WIDTH);
624   assert(fb->height <= KNOB_GUARDBAND_HEIGHT);
625
626   if (changed) {
627      util_copy_framebuffer_state(&ctx->framebuffer, fb);
628
629      ctx->dirty |= SWR_NEW_FRAMEBUFFER;
630   }
631}
632
633
634static void
635swr_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
636{
637   struct swr_context *ctx = swr_context(pipe);
638
639   if (sample_mask != ctx->sample_mask) {
640      ctx->sample_mask = sample_mask;
641      ctx->dirty |= SWR_NEW_RASTERIZER;
642   }
643}
644
645/*
646 * Update resource in-use status
647 * All resources bound to color or depth targets marked as WRITE resources.
648 * VBO Vertex/index buffers and texture views marked as READ resources.
649 */
650void
651swr_update_resource_status(struct pipe_context *pipe,
652                           const struct pipe_draw_info *p_draw_info)
653{
654   struct swr_context *ctx = swr_context(pipe);
655   struct pipe_framebuffer_state *fb = &ctx->framebuffer;
656
657   /* colorbuffer targets */
658   if (fb->nr_cbufs)
659      for (uint32_t i = 0; i < fb->nr_cbufs; ++i)
660         if (fb->cbufs[i])
661            swr_resource_write(fb->cbufs[i]->texture);
662
663   /* depth/stencil target */
664   if (fb->zsbuf)
665      swr_resource_write(fb->zsbuf->texture);
666
667   /* VBO vertex buffers */
668   for (uint32_t i = 0; i < ctx->num_vertex_buffers; i++) {
669      struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
670      if (!vb->user_buffer)
671         swr_resource_read(vb->buffer);
672   }
673
674   /* VBO index buffer */
675   if (p_draw_info && p_draw_info->indexed) {
676      struct pipe_index_buffer *ib = &ctx->index_buffer;
677      if (!ib->user_buffer)
678         swr_resource_read(ib->buffer);
679   }
680
681   /* transform feedback buffers */
682   for (uint32_t i = 0; i < ctx->num_so_targets; i++) {
683      struct pipe_stream_output_target *target = ctx->so_targets[i];
684      if (target && target->buffer)
685         swr_resource_write(target->buffer);
686   }
687
688   /* texture sampler views */
689   for (uint32_t j : {PIPE_SHADER_VERTEX, PIPE_SHADER_FRAGMENT}) {
690      for (uint32_t i = 0; i < ctx->num_sampler_views[j]; i++) {
691         struct pipe_sampler_view *view = ctx->sampler_views[j][i];
692         if (view)
693            swr_resource_read(view->texture);
694      }
695   }
696
697   /* constant buffers */
698   for (uint32_t j : {PIPE_SHADER_VERTEX, PIPE_SHADER_FRAGMENT}) {
699      for (uint32_t i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
700         struct pipe_constant_buffer *cb = &ctx->constants[j][i];
701         if (cb->buffer)
702            swr_resource_read(cb->buffer);
703      }
704   }
705}
706
707static void
708swr_update_texture_state(struct swr_context *ctx,
709                         unsigned shader_type,
710                         unsigned num_sampler_views,
711                         swr_jit_texture *textures)
712{
713   for (unsigned i = 0; i < num_sampler_views; i++) {
714      struct pipe_sampler_view *view =
715         ctx->sampler_views[shader_type][i];
716      struct swr_jit_texture *jit_tex = &textures[i];
717
718      memset(jit_tex, 0, sizeof(*jit_tex));
719      if (view) {
720         struct pipe_resource *res = view->texture;
721         struct swr_resource *swr_res = swr_resource(res);
722         SWR_SURFACE_STATE *swr = &swr_res->swr;
723         size_t *mip_offsets = swr_res->mip_offsets;
724         if (swr_res->has_depth && swr_res->has_stencil &&
725            !util_format_has_depth(util_format_description(view->format))) {
726            swr = &swr_res->secondary;
727            mip_offsets = swr_res->secondary_mip_offsets;
728         }
729
730         jit_tex->width = res->width0;
731         jit_tex->height = res->height0;
732         jit_tex->base_ptr = swr->pBaseAddress;
733         if (view->target != PIPE_BUFFER) {
734            jit_tex->first_level = view->u.tex.first_level;
735            jit_tex->last_level = view->u.tex.last_level;
736            if (view->target == PIPE_TEXTURE_3D)
737               jit_tex->depth = res->depth0;
738            else
739               jit_tex->depth =
740                  view->u.tex.last_layer - view->u.tex.first_layer + 1;
741            jit_tex->base_ptr += view->u.tex.first_layer *
742               swr->qpitch * swr->pitch;
743         } else {
744            unsigned view_blocksize = util_format_get_blocksize(view->format);
745            jit_tex->base_ptr += view->u.buf.offset;
746            jit_tex->width = view->u.buf.size / view_blocksize;
747            jit_tex->depth = 1;
748         }
749
750         for (unsigned level = jit_tex->first_level;
751              level <= jit_tex->last_level;
752              level++) {
753            jit_tex->row_stride[level] = swr->pitch;
754            jit_tex->img_stride[level] = swr->qpitch * swr->pitch;
755            jit_tex->mip_offsets[level] = mip_offsets[level];
756         }
757      }
758   }
759}
760
761static void
762swr_update_sampler_state(struct swr_context *ctx,
763                         unsigned shader_type,
764                         unsigned num_samplers,
765                         swr_jit_sampler *samplers)
766{
767   for (unsigned i = 0; i < num_samplers; i++) {
768      const struct pipe_sampler_state *sampler =
769         ctx->samplers[shader_type][i];
770
771      if (sampler) {
772         samplers[i].min_lod = sampler->min_lod;
773         samplers[i].max_lod = sampler->max_lod;
774         samplers[i].lod_bias = sampler->lod_bias;
775         COPY_4V(samplers[i].border_color, sampler->border_color.f);
776      }
777   }
778}
779
780static void
781swr_update_constants(struct swr_context *ctx, enum pipe_shader_type shaderType)
782{
783   swr_draw_context *pDC = &ctx->swrDC;
784
785   const float **constant;
786   uint32_t *num_constants;
787   struct swr_scratch_space *scratch;
788
789   switch (shaderType) {
790   case PIPE_SHADER_VERTEX:
791      constant = pDC->constantVS;
792      num_constants = pDC->num_constantsVS;
793      scratch = &ctx->scratch->vs_constants;
794      break;
795   case PIPE_SHADER_FRAGMENT:
796      constant = pDC->constantFS;
797      num_constants = pDC->num_constantsFS;
798      scratch = &ctx->scratch->fs_constants;
799      break;
800   default:
801      debug_printf("Unsupported shader type constants\n");
802      return;
803   }
804
805   for (UINT i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
806      const pipe_constant_buffer *cb = &ctx->constants[shaderType][i];
807      num_constants[i] = cb->buffer_size;
808      if (cb->buffer) {
809         constant[i] =
810            (const float *)(swr_resource_data(cb->buffer) +
811                            cb->buffer_offset);
812      } else {
813         /* Need to copy these constants to scratch space */
814         if (cb->user_buffer && cb->buffer_size) {
815            const void *ptr =
816               ((const uint8_t *)cb->user_buffer + cb->buffer_offset);
817            uint32_t size = AlignUp(cb->buffer_size, 4);
818            ptr = swr_copy_to_scratch_space(ctx, scratch, ptr, size);
819            constant[i] = (const float *)ptr;
820         }
821      }
822   }
823}
824
825static bool
826swr_change_rt(struct swr_context *ctx,
827              unsigned attachment,
828              const struct pipe_surface *sf)
829{
830   swr_draw_context *pDC = &ctx->swrDC;
831   struct SWR_SURFACE_STATE *rt = &pDC->renderTargets[attachment];
832
833   /* Do nothing if the render target hasn't changed */
834   if ((!sf || !sf->texture) && rt->pBaseAddress == nullptr)
835      return false;
836
837   /* Deal with disabling RT up front */
838   if (!sf || !sf->texture) {
839      /* If detaching attachment, mark tiles as RESOLVED so core
840       * won't try to load from non-existent target. */
841      swr_store_render_target(&ctx->pipe, attachment, SWR_TILE_RESOLVED);
842      *rt = {0};
843      return true;
844   }
845
846   const struct swr_resource *swr = swr_resource(sf->texture);
847   const SWR_SURFACE_STATE *swr_surface = &swr->swr;
848   SWR_FORMAT fmt = mesa_to_swr_format(sf->format);
849
850   if (attachment == SWR_ATTACHMENT_STENCIL && swr->secondary.pBaseAddress) {
851      swr_surface = &swr->secondary;
852      fmt = swr_surface->format;
853   }
854
855   if (rt->pBaseAddress == swr_surface->pBaseAddress &&
856       rt->format == fmt &&
857       rt->lod == sf->u.tex.level &&
858       rt->arrayIndex == sf->u.tex.first_layer)
859      return false;
860
861   bool need_fence = false;
862
863   /* StoreTile for changed target */
864   if (rt->pBaseAddress) {
865      /* If changing attachment to a new target, mark tiles as
866       * INVALID so they are reloaded from surface. */
867      swr_store_render_target(&ctx->pipe, attachment, SWR_TILE_INVALID);
868      need_fence = true;
869   }
870
871   /* Make new attachment */
872   *rt = *swr_surface;
873   rt->format = fmt;
874   rt->lod = sf->u.tex.level;
875   rt->arrayIndex = sf->u.tex.first_layer;
876
877   return need_fence;
878}
879
880static inline void
881swr_user_vbuf_range(const struct pipe_draw_info *info,
882                    const struct swr_vertex_element_state *velems,
883                    const struct pipe_vertex_buffer *vb,
884                    uint32_t i,
885                    uint32_t *totelems,
886                    uint32_t *base,
887                    uint32_t *size)
888{
889   /* FIXME: The size is too large - we don't access the full extra stride. */
890   unsigned elems;
891   if (velems->instanced_bufs & (1U << i)) {
892      elems = info->instance_count / velems->min_instance_div[i] + 1;
893      *totelems = info->start_instance + elems;
894      *base = info->start_instance * vb->stride;
895      *size = elems * vb->stride;
896   } else if (vb->stride) {
897      elems = info->max_index - info->min_index + 1;
898      *totelems = info->max_index + 1;
899      *base = info->min_index * vb->stride;
900      *size = elems * vb->stride;
901   } else {
902      *totelems = 1;
903      *base = 0;
904      *size = velems->stream_pitch[i];
905   }
906}
907
908void
909swr_update_derived(struct pipe_context *pipe,
910                   const struct pipe_draw_info *p_draw_info)
911{
912   struct swr_context *ctx = swr_context(pipe);
913   struct swr_screen *screen = swr_screen(pipe->screen);
914
915   /* Update screen->pipe to current pipe context. */
916   if (screen->pipe != pipe)
917      screen->pipe = pipe;
918
919   /* Any state that requires dirty flags to be re-triggered sets this mask */
920   /* For example, user_buffer vertex and index buffers. */
921   unsigned post_update_dirty_flags = 0;
922
923   /* Render Targets */
924   if (ctx->dirty & SWR_NEW_FRAMEBUFFER) {
925      struct pipe_framebuffer_state *fb = &ctx->framebuffer;
926      const struct util_format_description *desc = NULL;
927      bool need_fence = false;
928
929      /* colorbuffer targets */
930      if (fb->nr_cbufs) {
931         for (unsigned i = 0; i < fb->nr_cbufs; ++i)
932            need_fence |= swr_change_rt(
933                  ctx, SWR_ATTACHMENT_COLOR0 + i, fb->cbufs[i]);
934      }
935      for (unsigned i = fb->nr_cbufs; i < SWR_NUM_RENDERTARGETS; ++i)
936         need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_COLOR0 + i, NULL);
937
938      /* depth/stencil target */
939      if (fb->zsbuf)
940         desc = util_format_description(fb->zsbuf->format);
941      if (fb->zsbuf && util_format_has_depth(desc))
942         need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_DEPTH, fb->zsbuf);
943      else
944         need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_DEPTH, NULL);
945
946      if (fb->zsbuf && util_format_has_stencil(desc))
947         need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_STENCIL, fb->zsbuf);
948      else
949         need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_STENCIL, NULL);
950
951      /* This fence ensures any attachment changes are resolved before the
952       * next draw */
953      if (need_fence)
954         swr_fence_submit(ctx, screen->flush_fence);
955   }
956
957   /* Raster state */
958   if (ctx->dirty & (SWR_NEW_RASTERIZER |
959                     SWR_NEW_VS | // clipping
960                     SWR_NEW_FRAMEBUFFER)) {
961      pipe_rasterizer_state *rasterizer = ctx->rasterizer;
962      pipe_framebuffer_state *fb = &ctx->framebuffer;
963
964      SWR_RASTSTATE *rastState = &ctx->derived.rastState;
965      rastState->cullMode = swr_convert_cull_mode(rasterizer->cull_face);
966      rastState->frontWinding = rasterizer->front_ccw
967         ? SWR_FRONTWINDING_CCW
968         : SWR_FRONTWINDING_CW;
969      rastState->scissorEnable = rasterizer->scissor;
970      rastState->pointSize = rasterizer->point_size > 0.0f
971         ? rasterizer->point_size
972         : 1.0f;
973      rastState->lineWidth = rasterizer->line_width > 0.0f
974         ? rasterizer->line_width
975         : 1.0f;
976
977      rastState->pointParam = rasterizer->point_size_per_vertex;
978
979      rastState->pointSpriteEnable = rasterizer->sprite_coord_enable;
980      rastState->pointSpriteTopOrigin =
981         rasterizer->sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT;
982
983      /* XXX TODO: Add multisample */
984      rastState->msaaRastEnable = false;
985      rastState->rastMode = SWR_MSAA_RASTMODE_OFF_PIXEL;
986      rastState->sampleCount = SWR_MULTISAMPLE_1X;
987      rastState->forcedSampleCount = false;
988
989      bool do_offset = false;
990      switch (rasterizer->fill_front) {
991      case PIPE_POLYGON_MODE_FILL:
992         do_offset = rasterizer->offset_tri;
993         break;
994      case PIPE_POLYGON_MODE_LINE:
995         do_offset = rasterizer->offset_line;
996         break;
997      case PIPE_POLYGON_MODE_POINT:
998         do_offset = rasterizer->offset_point;
999         break;
1000      }
1001
1002      if (do_offset) {
1003         rastState->depthBias = rasterizer->offset_units;
1004         rastState->slopeScaledDepthBias = rasterizer->offset_scale;
1005         rastState->depthBiasClamp = rasterizer->offset_clamp;
1006      } else {
1007         rastState->depthBias = 0;
1008         rastState->slopeScaledDepthBias = 0;
1009         rastState->depthBiasClamp = 0;
1010      }
1011      struct pipe_surface *zb = fb->zsbuf;
1012      if (zb && swr_resource(zb->texture)->has_depth)
1013         rastState->depthFormat = swr_resource(zb->texture)->swr.format;
1014
1015      rastState->depthClipEnable = rasterizer->depth_clip;
1016      rastState->clipHalfZ = rasterizer->clip_halfz;
1017
1018      rastState->clipDistanceMask =
1019         ctx->vs->info.base.num_written_clipdistance ?
1020         ctx->vs->info.base.clipdist_writemask & rasterizer->clip_plane_enable :
1021         rasterizer->clip_plane_enable;
1022
1023      rastState->cullDistanceMask =
1024         ctx->vs->info.base.culldist_writemask << ctx->vs->info.base.num_written_clipdistance;
1025
1026      SwrSetRastState(ctx->swrContext, rastState);
1027   }
1028
1029   /* Scissor */
1030   if (ctx->dirty & SWR_NEW_SCISSOR) {
1031      SwrSetScissorRects(ctx->swrContext, 1, &ctx->swr_scissor);
1032   }
1033
1034   /* Viewport */
1035   if (ctx->dirty & (SWR_NEW_VIEWPORT | SWR_NEW_FRAMEBUFFER
1036                     | SWR_NEW_RASTERIZER)) {
1037      pipe_viewport_state *state = &ctx->viewport;
1038      pipe_framebuffer_state *fb = &ctx->framebuffer;
1039      pipe_rasterizer_state *rasterizer = ctx->rasterizer;
1040
1041      SWR_VIEWPORT *vp = &ctx->derived.vp;
1042      SWR_VIEWPORT_MATRICES *vpm = &ctx->derived.vpm;
1043
1044      vp->x = state->translate[0] - state->scale[0];
1045      vp->width = 2 * state->scale[0];
1046      vp->y = state->translate[1] - fabs(state->scale[1]);
1047      vp->height = 2 * fabs(state->scale[1]);
1048      util_viewport_zmin_zmax(state, rasterizer->clip_halfz,
1049                              &vp->minZ, &vp->maxZ);
1050
1051      vpm->m00[0] = state->scale[0];
1052      vpm->m11[0] = state->scale[1];
1053      vpm->m22[0] = state->scale[2];
1054      vpm->m30[0] = state->translate[0];
1055      vpm->m31[0] = state->translate[1];
1056      vpm->m32[0] = state->translate[2];
1057
1058      /* Now that the matrix is calculated, clip the view coords to screen
1059       * size.  OpenGL allows for -ve x,y in the viewport. */
1060      if (vp->x < 0.0f) {
1061         vp->width += vp->x;
1062         vp->x = 0.0f;
1063      }
1064      if (vp->y < 0.0f) {
1065         vp->height += vp->y;
1066         vp->y = 0.0f;
1067      }
1068      vp->width = std::min(vp->width, (float)fb->width - vp->x);
1069      vp->height = std::min(vp->height, (float)fb->height - vp->y);
1070
1071      SwrSetViewports(ctx->swrContext, 1, vp, vpm);
1072   }
1073
1074   /* Set vertex & index buffers */
1075   /* (using draw info if called by swr_draw_vbo) */
1076   if (ctx->dirty & SWR_NEW_VERTEX) {
1077      uint32_t scratch_total;
1078      uint8_t *scratch = NULL;
1079
1080      /* If being called by swr_draw_vbo, copy draw details */
1081      struct pipe_draw_info info = {0};
1082      if (p_draw_info)
1083         info = *p_draw_info;
1084
1085      /* We must get all the scratch space in one go */
1086      scratch_total = 0;
1087      for (UINT i = 0; i < ctx->num_vertex_buffers; i++) {
1088         struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
1089
1090         if (!vb->user_buffer)
1091            continue;
1092
1093         uint32_t elems, base, size;
1094         swr_user_vbuf_range(&info, ctx->velems, vb, i, &elems, &base, &size);
1095         scratch_total += AlignUp(size, 4);
1096      }
1097
1098      if (scratch_total) {
1099         scratch = (uint8_t *)swr_copy_to_scratch_space(
1100               ctx, &ctx->scratch->vertex_buffer, NULL, scratch_total);
1101      }
1102
1103      /* vertex buffers */
1104      SWR_VERTEX_BUFFER_STATE swrVertexBuffers[PIPE_MAX_ATTRIBS];
1105      for (UINT i = 0; i < ctx->num_vertex_buffers; i++) {
1106         uint32_t size, pitch, elems, partial_inbounds;
1107         const uint8_t *p_data;
1108         struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
1109
1110         pitch = vb->stride;
1111         if (!vb->user_buffer) {
1112            /* VBO
1113             * size is based on buffer->width0 rather than info.max_index
1114             * to prevent having to validate VBO on each draw */
1115            size = vb->buffer->width0;
1116            elems = size / pitch;
1117            partial_inbounds = size % pitch;
1118
1119            p_data = swr_resource_data(vb->buffer) + vb->buffer_offset;
1120         } else {
1121            /* Client buffer
1122             * client memory is one-time use, re-trigger SWR_NEW_VERTEX to
1123             * revalidate on each draw */
1124            post_update_dirty_flags |= SWR_NEW_VERTEX;
1125
1126            uint32_t base;
1127            swr_user_vbuf_range(&info, ctx->velems, vb, i, &elems, &base, &size);
1128            partial_inbounds = 0;
1129
1130            /* Copy only needed vertices to scratch space */
1131            size = AlignUp(size, 4);
1132            const void *ptr = (const uint8_t *) vb->user_buffer + base;
1133            memcpy(scratch, ptr, size);
1134            ptr = scratch;
1135            scratch += size;
1136            p_data = (const uint8_t *)ptr - base;
1137         }
1138
1139         swrVertexBuffers[i] = {0};
1140         swrVertexBuffers[i].index = i;
1141         swrVertexBuffers[i].pitch = pitch;
1142         swrVertexBuffers[i].pData = p_data;
1143         swrVertexBuffers[i].size = size;
1144         swrVertexBuffers[i].maxVertex = elems;
1145         swrVertexBuffers[i].partialInboundsSize = partial_inbounds;
1146      }
1147
1148      SwrSetVertexBuffers(
1149         ctx->swrContext, ctx->num_vertex_buffers, swrVertexBuffers);
1150
1151      /* index buffer, if required (info passed in by swr_draw_vbo) */
1152      SWR_FORMAT index_type = R32_UINT; /* Default for non-indexed draws */
1153      if (info.indexed) {
1154         const uint8_t *p_data;
1155         uint32_t size, pitch;
1156         struct pipe_index_buffer *ib = &ctx->index_buffer;
1157
1158         pitch = ib->index_size ? ib->index_size : sizeof(uint32_t);
1159         index_type = swr_convert_index_type(pitch);
1160
1161         if (!ib->user_buffer) {
1162            /* VBO
1163             * size is based on buffer->width0 rather than info.count
1164             * to prevent having to validate VBO on each draw */
1165            size = ib->buffer->width0;
1166            p_data = swr_resource_data(ib->buffer) + ib->offset;
1167         } else {
1168            /* Client buffer
1169             * client memory is one-time use, re-trigger SWR_NEW_VERTEX to
1170             * revalidate on each draw */
1171            post_update_dirty_flags |= SWR_NEW_VERTEX;
1172
1173            size = info.count * pitch;
1174            size = AlignUp(size, 4);
1175
1176            /* Copy indices to scratch space */
1177            const void *ptr = ib->user_buffer;
1178            ptr = swr_copy_to_scratch_space(
1179               ctx, &ctx->scratch->index_buffer, ptr, size);
1180            p_data = (const uint8_t *)ptr;
1181         }
1182
1183         SWR_INDEX_BUFFER_STATE swrIndexBuffer;
1184         swrIndexBuffer.format = swr_convert_index_type(ib->index_size);
1185         swrIndexBuffer.pIndices = p_data;
1186         swrIndexBuffer.size = size;
1187
1188         SwrSetIndexBuffer(ctx->swrContext, &swrIndexBuffer);
1189      }
1190
1191      struct swr_vertex_element_state *velems = ctx->velems;
1192      if (velems && velems->fsState.indexType != index_type) {
1193         velems->fsFunc = NULL;
1194         velems->fsState.indexType = index_type;
1195      }
1196   }
1197
1198   /* VertexShader */
1199   if (ctx->dirty & (SWR_NEW_VS |
1200                     SWR_NEW_RASTERIZER | // for clip planes
1201                     SWR_NEW_SAMPLER |
1202                     SWR_NEW_SAMPLER_VIEW |
1203                     SWR_NEW_FRAMEBUFFER)) {
1204      swr_jit_vs_key key;
1205      swr_generate_vs_key(key, ctx, ctx->vs);
1206      auto search = ctx->vs->map.find(key);
1207      PFN_VERTEX_FUNC func;
1208      if (search != ctx->vs->map.end()) {
1209         func = search->second->shader;
1210      } else {
1211         func = swr_compile_vs(ctx, key);
1212      }
1213      SwrSetVertexFunc(ctx->swrContext, func);
1214
1215      /* JIT sampler state */
1216      if (ctx->dirty & SWR_NEW_SAMPLER) {
1217         swr_update_sampler_state(ctx,
1218                                  PIPE_SHADER_VERTEX,
1219                                  key.nr_samplers,
1220                                  ctx->swrDC.samplersVS);
1221      }
1222
1223      /* JIT sampler view state */
1224      if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) {
1225         swr_update_texture_state(ctx,
1226                                  PIPE_SHADER_VERTEX,
1227                                  key.nr_sampler_views,
1228                                  ctx->swrDC.texturesVS);
1229      }
1230   }
1231
1232   /* FragmentShader */
1233   if (ctx->dirty & (SWR_NEW_FS | SWR_NEW_SAMPLER | SWR_NEW_SAMPLER_VIEW
1234                     | SWR_NEW_RASTERIZER | SWR_NEW_FRAMEBUFFER)) {
1235      swr_jit_fs_key key;
1236      swr_generate_fs_key(key, ctx, ctx->fs);
1237      auto search = ctx->fs->map.find(key);
1238      PFN_PIXEL_KERNEL func;
1239      if (search != ctx->fs->map.end()) {
1240         func = search->second->shader;
1241      } else {
1242         func = swr_compile_fs(ctx, key);
1243      }
1244      SWR_PS_STATE psState = {0};
1245      psState.pfnPixelShader = func;
1246      psState.killsPixel = ctx->fs->info.base.uses_kill;
1247      psState.inputCoverage = SWR_INPUT_COVERAGE_NORMAL;
1248      psState.writesODepth = ctx->fs->info.base.writes_z;
1249      psState.usesSourceDepth = ctx->fs->info.base.reads_z;
1250      psState.shadingRate = SWR_SHADING_RATE_PIXEL; // XXX
1251      psState.numRenderTargets = ctx->framebuffer.nr_cbufs;
1252      psState.posOffset = SWR_PS_POSITION_SAMPLE_NONE; // XXX msaa
1253      uint32_t barycentricsMask = 0;
1254#if 0
1255      // when we switch to mesa-master
1256      if (ctx->fs->info.base.uses_persp_center ||
1257          ctx->fs->info.base.uses_linear_center)
1258         barycentricsMask |= SWR_BARYCENTRIC_PER_PIXEL_MASK;
1259      if (ctx->fs->info.base.uses_persp_centroid ||
1260          ctx->fs->info.base.uses_linear_centroid)
1261         barycentricsMask |= SWR_BARYCENTRIC_CENTROID_MASK;
1262      if (ctx->fs->info.base.uses_persp_sample ||
1263          ctx->fs->info.base.uses_linear_sample)
1264         barycentricsMask |= SWR_BARYCENTRIC_PER_SAMPLE_MASK;
1265#else
1266      for (unsigned i = 0; i < ctx->fs->info.base.num_inputs; i++) {
1267         switch (ctx->fs->info.base.input_interpolate_loc[i]) {
1268         case TGSI_INTERPOLATE_LOC_CENTER:
1269            barycentricsMask |= SWR_BARYCENTRIC_PER_PIXEL_MASK;
1270            break;
1271         case TGSI_INTERPOLATE_LOC_CENTROID:
1272            barycentricsMask |= SWR_BARYCENTRIC_CENTROID_MASK;
1273            break;
1274         case TGSI_INTERPOLATE_LOC_SAMPLE:
1275            barycentricsMask |= SWR_BARYCENTRIC_PER_SAMPLE_MASK;
1276            break;
1277         }
1278      }
1279#endif
1280      psState.barycentricsMask = barycentricsMask;
1281      psState.usesUAV = false; // XXX
1282      psState.forceEarlyZ = false;
1283      SwrSetPixelShaderState(ctx->swrContext, &psState);
1284
1285      /* JIT sampler state */
1286      if (ctx->dirty & SWR_NEW_SAMPLER) {
1287         swr_update_sampler_state(ctx,
1288                                  PIPE_SHADER_FRAGMENT,
1289                                  key.nr_samplers,
1290                                  ctx->swrDC.samplersFS);
1291      }
1292
1293      /* JIT sampler view state */
1294      if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) {
1295         swr_update_texture_state(ctx,
1296                                  PIPE_SHADER_FRAGMENT,
1297                                  key.nr_sampler_views,
1298                                  ctx->swrDC.texturesFS);
1299      }
1300   }
1301
1302
1303   /* VertexShader Constants */
1304   if (ctx->dirty & SWR_NEW_VSCONSTANTS) {
1305      swr_update_constants(ctx, PIPE_SHADER_VERTEX);
1306   }
1307
1308   /* FragmentShader Constants */
1309   if (ctx->dirty & SWR_NEW_FSCONSTANTS) {
1310      swr_update_constants(ctx, PIPE_SHADER_FRAGMENT);
1311   }
1312
1313   /* Depth/stencil state */
1314   if (ctx->dirty & (SWR_NEW_DEPTH_STENCIL_ALPHA | SWR_NEW_FRAMEBUFFER)) {
1315      struct pipe_depth_state *depth = &(ctx->depth_stencil->depth);
1316      struct pipe_stencil_state *stencil = ctx->depth_stencil->stencil;
1317      SWR_DEPTH_STENCIL_STATE depthStencilState = {{0}};
1318      SWR_DEPTH_BOUNDS_STATE depthBoundsState = {0};
1319
1320      /* XXX, incomplete.  Need to flesh out stencil & alpha test state
1321      struct pipe_stencil_state *front_stencil =
1322      ctx->depth_stencil.stencil[0];
1323      struct pipe_stencil_state *back_stencil = ctx->depth_stencil.stencil[1];
1324      struct pipe_alpha_state alpha;
1325      */
1326      if (stencil[0].enabled) {
1327         depthStencilState.stencilWriteEnable = 1;
1328         depthStencilState.stencilTestEnable = 1;
1329         depthStencilState.stencilTestFunc =
1330            swr_convert_depth_func(stencil[0].func);
1331
1332         depthStencilState.stencilPassDepthPassOp =
1333            swr_convert_stencil_op(stencil[0].zpass_op);
1334         depthStencilState.stencilPassDepthFailOp =
1335            swr_convert_stencil_op(stencil[0].zfail_op);
1336         depthStencilState.stencilFailOp =
1337            swr_convert_stencil_op(stencil[0].fail_op);
1338         depthStencilState.stencilWriteMask = stencil[0].writemask;
1339         depthStencilState.stencilTestMask = stencil[0].valuemask;
1340         depthStencilState.stencilRefValue = ctx->stencil_ref.ref_value[0];
1341      }
1342      if (stencil[1].enabled) {
1343         depthStencilState.doubleSidedStencilTestEnable = 1;
1344
1345         depthStencilState.backfaceStencilTestFunc =
1346            swr_convert_depth_func(stencil[1].func);
1347
1348         depthStencilState.backfaceStencilPassDepthPassOp =
1349            swr_convert_stencil_op(stencil[1].zpass_op);
1350         depthStencilState.backfaceStencilPassDepthFailOp =
1351            swr_convert_stencil_op(stencil[1].zfail_op);
1352         depthStencilState.backfaceStencilFailOp =
1353            swr_convert_stencil_op(stencil[1].fail_op);
1354         depthStencilState.backfaceStencilWriteMask = stencil[1].writemask;
1355         depthStencilState.backfaceStencilTestMask = stencil[1].valuemask;
1356
1357         depthStencilState.backfaceStencilRefValue =
1358            ctx->stencil_ref.ref_value[1];
1359      }
1360
1361      depthStencilState.depthTestEnable = depth->enabled;
1362      depthStencilState.depthTestFunc = swr_convert_depth_func(depth->func);
1363      depthStencilState.depthWriteEnable = depth->writemask;
1364      SwrSetDepthStencilState(ctx->swrContext, &depthStencilState);
1365
1366      depthBoundsState.depthBoundsTestEnable = depth->bounds_test;
1367      depthBoundsState.depthBoundsTestMinValue = depth->bounds_min;
1368      depthBoundsState.depthBoundsTestMaxValue = depth->bounds_max;
1369      SwrSetDepthBoundsState(ctx->swrContext, &depthBoundsState);
1370   }
1371
1372   /* Blend State */
1373   if (ctx->dirty & (SWR_NEW_BLEND |
1374                     SWR_NEW_FRAMEBUFFER |
1375                     SWR_NEW_DEPTH_STENCIL_ALPHA)) {
1376      struct pipe_framebuffer_state *fb = &ctx->framebuffer;
1377
1378      SWR_BLEND_STATE blendState;
1379      memcpy(&blendState, &ctx->blend->blendState, sizeof(blendState));
1380      blendState.constantColor[0] = ctx->blend_color.color[0];
1381      blendState.constantColor[1] = ctx->blend_color.color[1];
1382      blendState.constantColor[2] = ctx->blend_color.color[2];
1383      blendState.constantColor[3] = ctx->blend_color.color[3];
1384      blendState.alphaTestReference =
1385         *((uint32_t*)&ctx->depth_stencil->alpha.ref_value);
1386
1387      // XXX MSAA
1388      blendState.sampleMask = 0;
1389      blendState.sampleCount = SWR_MULTISAMPLE_1X;
1390
1391      /* If there are no color buffers bound, disable writes on RT0
1392       * and skip loop */
1393      if (fb->nr_cbufs == 0) {
1394         blendState.renderTarget[0].writeDisableRed = 1;
1395         blendState.renderTarget[0].writeDisableGreen = 1;
1396         blendState.renderTarget[0].writeDisableBlue = 1;
1397         blendState.renderTarget[0].writeDisableAlpha = 1;
1398         SwrSetBlendFunc(ctx->swrContext, 0, NULL);
1399      }
1400      else
1401         for (int target = 0;
1402               target < std::min(SWR_NUM_RENDERTARGETS,
1403                                 PIPE_MAX_COLOR_BUFS);
1404               target++) {
1405            if (!fb->cbufs[target])
1406               continue;
1407
1408            struct swr_resource *colorBuffer =
1409               swr_resource(fb->cbufs[target]->texture);
1410
1411            BLEND_COMPILE_STATE compileState;
1412            memset(&compileState, 0, sizeof(compileState));
1413            compileState.format = colorBuffer->swr.format;
1414            memcpy(&compileState.blendState,
1415                   &ctx->blend->compileState[target],
1416                   sizeof(compileState.blendState));
1417
1418            const SWR_FORMAT_INFO& info = GetFormatInfo(compileState.format);
1419            if (compileState.blendState.logicOpEnable &&
1420                ((info.type[0] == SWR_TYPE_FLOAT) || info.isSRGB)) {
1421               compileState.blendState.logicOpEnable = false;
1422            }
1423
1424            if (info.type[0] == SWR_TYPE_SINT || info.type[0] == SWR_TYPE_UINT)
1425               compileState.blendState.blendEnable = false;
1426
1427            if (compileState.blendState.blendEnable == false &&
1428                compileState.blendState.logicOpEnable == false &&
1429                ctx->depth_stencil->alpha.enabled == 0) {
1430               SwrSetBlendFunc(ctx->swrContext, target, NULL);
1431               continue;
1432            }
1433
1434            compileState.desc.alphaTestEnable =
1435               ctx->depth_stencil->alpha.enabled;
1436            compileState.desc.independentAlphaBlendEnable =
1437               (compileState.blendState.sourceBlendFactor !=
1438                compileState.blendState.sourceAlphaBlendFactor) ||
1439               (compileState.blendState.destBlendFactor !=
1440                compileState.blendState.destAlphaBlendFactor) ||
1441               (compileState.blendState.colorBlendFunc !=
1442                compileState.blendState.alphaBlendFunc);
1443            compileState.desc.alphaToCoverageEnable =
1444               ctx->blend->pipe.alpha_to_coverage;
1445            compileState.desc.sampleMaskEnable = 0; // XXX
1446            compileState.desc.numSamples = 1; // XXX
1447
1448            compileState.alphaTestFunction =
1449               swr_convert_depth_func(ctx->depth_stencil->alpha.func);
1450            compileState.alphaTestFormat = ALPHA_TEST_FLOAT32; // xxx
1451
1452            compileState.Canonicalize();
1453
1454            PFN_BLEND_JIT_FUNC func = NULL;
1455            auto search = ctx->blendJIT->find(compileState);
1456            if (search != ctx->blendJIT->end()) {
1457               func = search->second;
1458            } else {
1459               HANDLE hJitMgr = screen->hJitMgr;
1460               func = JitCompileBlend(hJitMgr, compileState);
1461               debug_printf("BLEND shader %p\n", func);
1462               assert(func && "Error: BlendShader = NULL");
1463
1464               ctx->blendJIT->insert(std::make_pair(compileState, func));
1465            }
1466            SwrSetBlendFunc(ctx->swrContext, target, func);
1467         }
1468
1469      SwrSetBlendState(ctx->swrContext, &blendState);
1470   }
1471
1472   if (ctx->dirty & SWR_NEW_STIPPLE) {
1473      /* XXX What to do with this one??? SWR doesn't stipple */
1474   }
1475
1476   if (ctx->dirty & (SWR_NEW_VS | SWR_NEW_SO | SWR_NEW_RASTERIZER)) {
1477      ctx->vs->soState.rasterizerDisable =
1478         ctx->rasterizer->rasterizer_discard;
1479      SwrSetSoState(ctx->swrContext, &ctx->vs->soState);
1480
1481      pipe_stream_output_info *stream_output = &ctx->vs->pipe.stream_output;
1482
1483      for (uint32_t i = 0; i < ctx->num_so_targets; i++) {
1484         SWR_STREAMOUT_BUFFER buffer = {0};
1485         if (!ctx->so_targets[i])
1486            continue;
1487         buffer.enable = true;
1488         buffer.pBuffer =
1489            (uint32_t *)(swr_resource_data(ctx->so_targets[i]->buffer) +
1490                         ctx->so_targets[i]->buffer_offset);
1491         buffer.bufferSize = ctx->so_targets[i]->buffer_size >> 2;
1492         buffer.pitch = stream_output->stride[i];
1493         buffer.streamOffset = 0;
1494
1495         SwrSetSoBuffers(ctx->swrContext, &buffer, i);
1496      }
1497   }
1498
1499   if (ctx->dirty & SWR_NEW_CLIP) {
1500      // shader exporting clip distances overrides all user clip planes
1501      if (ctx->rasterizer->clip_plane_enable &&
1502          !ctx->vs->info.base.num_written_clipdistance)
1503      {
1504         swr_draw_context *pDC = &ctx->swrDC;
1505         memcpy(pDC->userClipPlanes,
1506                ctx->clip.ucp,
1507                sizeof(pDC->userClipPlanes));
1508      }
1509   }
1510
1511   // set up backend state
1512   SWR_BACKEND_STATE backendState = {0};
1513   backendState.numAttributes =
1514      ctx->vs->info.base.num_outputs - 1 +
1515      (ctx->rasterizer->sprite_coord_enable ? 1 : 0);
1516   for (unsigned i = 0; i < backendState.numAttributes; i++)
1517      backendState.numComponents[i] = 4;
1518   backendState.constantInterpolationMask = ctx->fs->constantMask |
1519      (ctx->rasterizer->flatshade ? ctx->fs->flatConstantMask : 0);
1520   backendState.pointSpriteTexCoordMask = ctx->fs->pointSpriteMask;
1521
1522   SwrSetBackendState(ctx->swrContext, &backendState);
1523
1524   /* Ensure that any in-progress attachment change StoreTiles finish */
1525   if (swr_is_fence_pending(screen->flush_fence))
1526      swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
1527
1528   /* Finally, update the in-use status of all resources involved in draw */
1529   swr_update_resource_status(pipe, p_draw_info);
1530
1531   ctx->dirty = post_update_dirty_flags;
1532}
1533
1534
1535static struct pipe_stream_output_target *
1536swr_create_so_target(struct pipe_context *pipe,
1537                     struct pipe_resource *buffer,
1538                     unsigned buffer_offset,
1539                     unsigned buffer_size)
1540{
1541   struct pipe_stream_output_target *target;
1542
1543   target = CALLOC_STRUCT(pipe_stream_output_target);
1544   if (!target)
1545      return NULL;
1546
1547   target->context = pipe;
1548   target->reference.count = 1;
1549   pipe_resource_reference(&target->buffer, buffer);
1550   target->buffer_offset = buffer_offset;
1551   target->buffer_size = buffer_size;
1552   return target;
1553}
1554
1555static void
1556swr_destroy_so_target(struct pipe_context *pipe,
1557                      struct pipe_stream_output_target *target)
1558{
1559   pipe_resource_reference(&target->buffer, NULL);
1560   FREE(target);
1561}
1562
1563static void
1564swr_set_so_targets(struct pipe_context *pipe,
1565                   unsigned num_targets,
1566                   struct pipe_stream_output_target **targets,
1567                   const unsigned *offsets)
1568{
1569   struct swr_context *swr = swr_context(pipe);
1570   uint32_t i;
1571
1572   assert(num_targets <= MAX_SO_STREAMS);
1573
1574   for (i = 0; i < num_targets; i++) {
1575      pipe_so_target_reference(
1576         (struct pipe_stream_output_target **)&swr->so_targets[i],
1577         targets[i]);
1578   }
1579
1580   for (/* fall-through */; i < swr->num_so_targets; i++) {
1581      pipe_so_target_reference(
1582         (struct pipe_stream_output_target **)&swr->so_targets[i], NULL);
1583   }
1584
1585   swr->num_so_targets = num_targets;
1586
1587   swr->dirty |= SWR_NEW_SO;
1588}
1589
1590
1591void
1592swr_state_init(struct pipe_context *pipe)
1593{
1594   pipe->create_blend_state = swr_create_blend_state;
1595   pipe->bind_blend_state = swr_bind_blend_state;
1596   pipe->delete_blend_state = swr_delete_blend_state;
1597
1598   pipe->create_depth_stencil_alpha_state = swr_create_depth_stencil_state;
1599   pipe->bind_depth_stencil_alpha_state = swr_bind_depth_stencil_state;
1600   pipe->delete_depth_stencil_alpha_state = swr_delete_depth_stencil_state;
1601
1602   pipe->create_rasterizer_state = swr_create_rasterizer_state;
1603   pipe->bind_rasterizer_state = swr_bind_rasterizer_state;
1604   pipe->delete_rasterizer_state = swr_delete_rasterizer_state;
1605
1606   pipe->create_sampler_state = swr_create_sampler_state;
1607   pipe->bind_sampler_states = swr_bind_sampler_states;
1608   pipe->delete_sampler_state = swr_delete_sampler_state;
1609
1610   pipe->create_sampler_view = swr_create_sampler_view;
1611   pipe->set_sampler_views = swr_set_sampler_views;
1612   pipe->sampler_view_destroy = swr_sampler_view_destroy;
1613
1614   pipe->create_vs_state = swr_create_vs_state;
1615   pipe->bind_vs_state = swr_bind_vs_state;
1616   pipe->delete_vs_state = swr_delete_vs_state;
1617
1618   pipe->create_fs_state = swr_create_fs_state;
1619   pipe->bind_fs_state = swr_bind_fs_state;
1620   pipe->delete_fs_state = swr_delete_fs_state;
1621
1622   pipe->set_constant_buffer = swr_set_constant_buffer;
1623
1624   pipe->create_vertex_elements_state = swr_create_vertex_elements_state;
1625   pipe->bind_vertex_elements_state = swr_bind_vertex_elements_state;
1626   pipe->delete_vertex_elements_state = swr_delete_vertex_elements_state;
1627
1628   pipe->set_vertex_buffers = swr_set_vertex_buffers;
1629   pipe->set_index_buffer = swr_set_index_buffer;
1630
1631   pipe->set_polygon_stipple = swr_set_polygon_stipple;
1632   pipe->set_clip_state = swr_set_clip_state;
1633   pipe->set_scissor_states = swr_set_scissor_states;
1634   pipe->set_viewport_states = swr_set_viewport_states;
1635
1636   pipe->set_framebuffer_state = swr_set_framebuffer_state;
1637
1638   pipe->set_blend_color = swr_set_blend_color;
1639   pipe->set_stencil_ref = swr_set_stencil_ref;
1640
1641   pipe->set_sample_mask = swr_set_sample_mask;
1642
1643   pipe->create_stream_output_target = swr_create_so_target;
1644   pipe->stream_output_target_destroy = swr_destroy_so_target;
1645   pipe->set_stream_output_targets = swr_set_so_targets;
1646}
1647