vl_zscan.c revision 6ad846ee78d9d8ba93dcecdefbf89f2b981333ef
1/**************************************************************************
2 *
3 * Copyright 2011 Christian König
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include <assert.h>
29
30#include <pipe/p_screen.h>
31#include <pipe/p_context.h>
32
33#include <util/u_draw.h>
34#include <util/u_sampler.h>
35#include <util/u_inlines.h>
36
37#include <tgsi/tgsi_ureg.h>
38
39#include <vl/vl_defines.h>
40#include <vl/vl_types.h>
41
42#include "vl_zscan.h"
43#include "vl_vertex_buffers.h"
44
45enum VS_OUTPUT
46{
47   VS_O_VPOS,
48   VS_O_VTEX
49};
50
51const int vl_zscan_linear[] =
52{
53   /* Linear scan pattern */
54    0, 1, 2, 3, 4, 5, 6, 7,
55    8, 9,10,11,12,13,14,15,
56   16,17,18,19,20,21,22,23,
57   24,25,26,27,28,29,30,31,
58   32,33,34,35,36,37,38,39,
59   40,41,42,43,44,45,46,47,
60   48,49,50,51,52,53,54,55,
61   56,57,58,59,60,61,62,63
62};
63
64const int vl_zscan_normal[] =
65{
66   /* Zig-Zag scan pattern */
67    0, 1, 8,16, 9, 2, 3,10,
68   17,24,32,25,18,11, 4, 5,
69   12,19,26,33,40,48,41,34,
70   27,20,13, 6, 7,14,21,28,
71   35,42,49,56,57,50,43,36,
72   29,22,15,23,30,37,44,51,
73   58,59,52,45,38,31,39,46,
74   53,60,61,54,47,55,62,63
75};
76
77const int vl_zscan_alternate[] =
78{
79   /* Alternate scan pattern */
80    0, 8,16,24, 1, 9, 2,10,
81   17,25,32,40,48,56,57,49,
82   41,33,26,18, 3,11, 4,12,
83   19,27,34,42,50,58,35,43,
84   51,59,20,28, 5,13, 6,14,
85   21,29,36,44,52,60,37,45,
86   53,61,22,30, 7,15,23,31,
87   38,46,54,62,39,47,55,63
88};
89
90static void *
91create_vert_shader(struct vl_zscan *zscan)
92{
93   struct ureg_program *shader;
94
95   struct ureg_src scale, instance;
96   struct ureg_src vrect, vpos;
97
98   struct ureg_dst tmp;
99   struct ureg_dst o_vpos, o_vtex[zscan->num_channels];
100
101   unsigned i;
102
103   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
104   if (!shader)
105      return NULL;
106
107   scale = ureg_imm2f(shader,
108      (float)BLOCK_WIDTH / zscan->buffer_width,
109      (float)BLOCK_HEIGHT / zscan->buffer_height);
110
111   instance = ureg_DECL_system_value(shader, 0, TGSI_SEMANTIC_INSTANCEID, 0);
112
113   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
114   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
115
116   tmp = ureg_DECL_temporary(shader);
117
118   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
119
120   for (i = 0; i < zscan->num_channels; ++i)
121      o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i);
122
123   /*
124    * o_vpos.xy = (vpos + vrect) * scale
125    * o_vpos.zw = 1.0f
126    *
127    * tmp.xy = InstanceID / blocks_per_line
128    * tmp.x = frac(tmp.x)
129    * tmp.y = floor(tmp.y)
130    *
131    * o_vtex.x = vrect.x / blocks_per_line + tmp.x
132    * o_vtex.y = vrect.y
133    * o_vtex.z = tmp.z * blocks_per_line / blocks_total
134    */
135   ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), vpos, vrect);
136   ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(tmp), scale);
137   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
138
139   ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XZ), instance,
140            ureg_imm1f(shader, 1.0f / zscan->blocks_per_line));
141
142   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(tmp));
143   ureg_FLR(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_src(tmp));
144
145   for (i = 0; i < zscan->num_channels; ++i) {
146      if (i > 0)
147         ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(tmp),
148                  ureg_imm1f(shader, 1.0f / (zscan->blocks_per_line * BLOCK_WIDTH)));
149
150      ureg_MAD(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_X), vrect,
151               ureg_imm1f(shader, 1.0f / zscan->blocks_per_line), ureg_src(tmp));
152      ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Y), vrect);
153      ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), ureg_src(tmp),
154               ureg_imm1f(shader, (float)zscan->blocks_per_line / zscan->blocks_total));
155   }
156
157   ureg_release_temporary(shader, tmp);
158   ureg_END(shader);
159
160   return ureg_create_shader_and_destroy(shader, zscan->pipe);
161}
162
163static void *
164create_frag_shader(struct vl_zscan *zscan)
165{
166   struct ureg_program *shader;
167   struct ureg_src vtex[zscan->num_channels];
168
169   struct ureg_src src, scan, quant;
170
171   struct ureg_dst tmp[zscan->num_channels];
172   struct ureg_dst fragment;
173
174   unsigned i;
175
176   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
177   if (!shader)
178      return NULL;
179
180   for (i = 0; i < zscan->num_channels; ++i)
181      vtex[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i, TGSI_INTERPOLATE_LINEAR);
182
183   src = ureg_DECL_sampler(shader, 0);
184   scan = ureg_DECL_sampler(shader, 1);
185   quant = ureg_DECL_sampler(shader, 2);
186
187   for (i = 0; i < zscan->num_channels; ++i)
188      tmp[i] = ureg_DECL_temporary(shader);
189
190   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
191
192   /*
193    * tmp.x = tex(vtex, 1)
194    * tmp.y = vtex.z
195    * fragment = tex(tmp, 0) * quant
196    */
197   for (i = 0; i < zscan->num_channels; ++i)
198      ureg_TEX(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, vtex[i], scan);
199
200   for (i = 0; i < zscan->num_channels; ++i)
201      ureg_MOV(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_Y), ureg_scalar(vtex[i], TGSI_SWIZZLE_Z));
202
203   for (i = 0; i < zscan->num_channels; ++i)
204      ureg_TEX(shader, tmp[i], TGSI_TEXTURE_2D, ureg_src(tmp[i]), src);
205
206   // TODO: Fetch quant and use it
207   for (i = 0; i < zscan->num_channels; ++i)
208      ureg_MUL(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), ureg_src(tmp[i]), ureg_imm1f(shader, 1.0f));
209
210   for (i = 0; i < zscan->num_channels; ++i)
211      ureg_release_temporary(shader, tmp[i]);
212   ureg_END(shader);
213
214   return ureg_create_shader_and_destroy(shader, zscan->pipe);
215}
216
217static bool
218init_shaders(struct vl_zscan *zscan)
219{
220   assert(zscan);
221
222   zscan->vs = create_vert_shader(zscan);
223   if (!zscan->vs)
224      goto error_vs;
225
226   zscan->fs = create_frag_shader(zscan);
227   if (!zscan->fs)
228      goto error_fs;
229
230   return true;
231
232error_fs:
233   zscan->pipe->delete_vs_state(zscan->pipe, zscan->vs);
234
235error_vs:
236   return false;
237}
238
239static void
240cleanup_shaders(struct vl_zscan *zscan)
241{
242   assert(zscan);
243
244   zscan->pipe->delete_vs_state(zscan->pipe, zscan->vs);
245   zscan->pipe->delete_fs_state(zscan->pipe, zscan->fs);
246}
247
248static bool
249init_state(struct vl_zscan *zscan)
250{
251   struct pipe_blend_state blend;
252   struct pipe_rasterizer_state rs_state;
253   struct pipe_sampler_state sampler;
254   unsigned i;
255
256   assert(zscan);
257
258   memset(&rs_state, 0, sizeof(rs_state));
259   rs_state.gl_rasterization_rules = false;
260   zscan->rs_state = zscan->pipe->create_rasterizer_state(zscan->pipe, &rs_state);
261   if (!zscan->rs_state)
262      goto error_rs_state;
263
264   memset(&blend, 0, sizeof blend);
265
266   blend.independent_blend_enable = 0;
267   blend.rt[0].blend_enable = 0;
268   blend.rt[0].rgb_func = PIPE_BLEND_ADD;
269   blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
270   blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
271   blend.rt[0].alpha_func = PIPE_BLEND_ADD;
272   blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
273   blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
274   blend.logicop_enable = 0;
275   blend.logicop_func = PIPE_LOGICOP_CLEAR;
276   /* Needed to allow color writes to FB, even if blending disabled */
277   blend.rt[0].colormask = PIPE_MASK_RGBA;
278   blend.dither = 0;
279   zscan->blend = zscan->pipe->create_blend_state(zscan->pipe, &blend);
280   if (!zscan->blend)
281      goto error_blend;
282
283   for (i = 0; i < 3; ++i) {
284      memset(&sampler, 0, sizeof(sampler));
285      sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
286      sampler.wrap_t = PIPE_TEX_WRAP_REPEAT;
287      sampler.wrap_r = PIPE_TEX_WRAP_REPEAT;
288      sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
289      sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
290      sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
291      sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
292      sampler.compare_func = PIPE_FUNC_ALWAYS;
293      sampler.normalized_coords = 1;
294      zscan->samplers[i] = zscan->pipe->create_sampler_state(zscan->pipe, &sampler);
295      if (!zscan->samplers[i])
296         goto error_samplers;
297   }
298
299   return true;
300
301error_samplers:
302   for (i = 0; i < 2; ++i)
303      if (zscan->samplers[i])
304         zscan->pipe->delete_sampler_state(zscan->pipe, zscan->samplers[i]);
305
306   zscan->pipe->delete_rasterizer_state(zscan->pipe, zscan->rs_state);
307
308error_blend:
309   zscan->pipe->delete_blend_state(zscan->pipe, zscan->blend);
310
311error_rs_state:
312   return false;
313}
314
315static void
316cleanup_state(struct vl_zscan *zscan)
317{
318   unsigned i;
319
320   assert(zscan);
321
322   for (i = 0; i < 3; ++i)
323      zscan->pipe->delete_sampler_state(zscan->pipe, zscan->samplers[i]);
324
325   zscan->pipe->delete_rasterizer_state(zscan->pipe, zscan->rs_state);
326   zscan->pipe->delete_blend_state(zscan->pipe, zscan->blend);
327}
328
329struct pipe_sampler_view *
330vl_zscan_layout(struct pipe_context *pipe, const int layout[64], unsigned blocks_per_line)
331{
332   const unsigned total_size = blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT;
333
334   int patched_layout[64];
335
336   struct pipe_resource res_tmpl, *res;
337   struct pipe_sampler_view sv_tmpl, *sv;
338   struct pipe_transfer *buf_transfer;
339   unsigned x, y, i, pitch;
340   float *f;
341
342   struct pipe_box rect =
343   {
344      0, 0, 0,
345      BLOCK_WIDTH * blocks_per_line,
346      BLOCK_HEIGHT,
347      1
348   };
349
350   assert(pipe && layout && blocks_per_line);
351
352   for (i = 0; i < 64; ++i)
353      patched_layout[layout[i]] = i;
354
355   memset(&res_tmpl, 0, sizeof(res_tmpl));
356   res_tmpl.target = PIPE_TEXTURE_2D;
357   res_tmpl.format = PIPE_FORMAT_R32_FLOAT;
358   res_tmpl.width0 = BLOCK_WIDTH * blocks_per_line;
359   res_tmpl.height0 = BLOCK_HEIGHT;
360   res_tmpl.depth0 = 1;
361   res_tmpl.array_size = 1;
362   res_tmpl.usage = PIPE_USAGE_IMMUTABLE;
363   res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW;
364
365   res = pipe->screen->resource_create(pipe->screen, &res_tmpl);
366   if (!res)
367      goto error_resource;
368
369   buf_transfer = pipe->get_transfer
370   (
371      pipe, res,
372      0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
373      &rect
374   );
375   if (!buf_transfer)
376      goto error_transfer;
377
378   pitch = buf_transfer->stride / sizeof(float);
379
380   f = pipe->transfer_map(pipe, buf_transfer);
381   if (!f)
382      goto error_map;
383
384   for (i = 0; i < blocks_per_line; ++i)
385      for (y = 0; y < BLOCK_HEIGHT; ++y)
386         for (x = 0; x < BLOCK_WIDTH; ++x) {
387            float addr = patched_layout[x + y * BLOCK_WIDTH] +
388               i * BLOCK_WIDTH * BLOCK_HEIGHT;
389
390            addr /= total_size;
391
392            f[i * BLOCK_WIDTH + y * pitch + x] = addr;
393         }
394
395   pipe->transfer_unmap(pipe, buf_transfer);
396   pipe->transfer_destroy(pipe, buf_transfer);
397
398   memset(&sv_tmpl, 0, sizeof(sv_tmpl));
399   u_sampler_view_default_template(&sv_tmpl, res, res->format);
400   sv = pipe->create_sampler_view(pipe, res, &sv_tmpl);
401   pipe_resource_reference(&res, NULL);
402   if (!sv)
403      goto error_map;
404
405   return sv;
406
407error_map:
408   pipe->transfer_destroy(pipe, buf_transfer);
409
410error_transfer:
411   pipe_resource_reference(&res, NULL);
412
413error_resource:
414   return NULL;
415}
416
417#if 0
418// TODO
419struct pipe_sampler_view *
420vl_zscan_normal(struct pipe_context *pipe, unsigned blocks_per_line);
421
422struct pipe_sampler_view *
423vl_zscan_alternate(struct pipe_context *pipe, unsigned blocks_per_line);
424#endif
425
426bool
427vl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe,
428              unsigned buffer_width, unsigned buffer_height,
429              unsigned blocks_per_line, unsigned blocks_total,
430              unsigned num_channels)
431{
432   assert(zscan && pipe);
433
434   zscan->pipe = pipe;
435   zscan->buffer_width = buffer_width;
436   zscan->buffer_height = buffer_height;
437   zscan->num_channels = num_channels;
438   zscan->blocks_per_line = blocks_per_line;
439   zscan->blocks_total = blocks_total;
440
441   if(!init_shaders(zscan))
442      return false;
443
444   if(!init_state(zscan)) {
445      cleanup_shaders(zscan);
446      return false;
447   }
448
449   return true;
450}
451
452void
453vl_zscan_cleanup(struct vl_zscan *zscan)
454{
455   assert(zscan);
456
457   cleanup_shaders(zscan);
458   cleanup_state(zscan);
459}
460
461#if 0
462// TODO
463void
464vl_zscan_upload_quant(struct vl_zscan *zscan, ...);
465#endif
466
467bool
468vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer,
469                     struct pipe_sampler_view *src, struct pipe_surface *dst)
470{
471   assert(zscan && buffer);
472
473   memset(buffer, 0, sizeof(struct vl_zscan_buffer));
474
475   buffer->zscan = zscan;
476
477   pipe_sampler_view_reference(&buffer->src, src);
478
479   buffer->viewport.scale[0] = dst->width;
480   buffer->viewport.scale[1] = dst->height;
481   buffer->viewport.scale[2] = 1;
482   buffer->viewport.scale[3] = 1;
483   buffer->viewport.translate[0] = 0;
484   buffer->viewport.translate[1] = 0;
485   buffer->viewport.translate[2] = 0;
486   buffer->viewport.translate[3] = 0;
487
488   buffer->fb_state.width = dst->width;
489   buffer->fb_state.height = dst->height;
490   buffer->fb_state.nr_cbufs = 1;
491   pipe_surface_reference(&buffer->fb_state.cbufs[0], dst);
492
493   return true;
494}
495
496void
497vl_zscan_cleanup_buffer(struct vl_zscan_buffer *buffer)
498{
499   assert(buffer);
500
501   pipe_sampler_view_reference(&buffer->src, NULL);
502   pipe_sampler_view_reference(&buffer->layout, NULL);
503   pipe_sampler_view_reference(&buffer->quant, NULL);
504   pipe_surface_reference(&buffer->fb_state.cbufs[0], NULL);
505}
506
507void
508vl_zscan_set_layout(struct vl_zscan_buffer *buffer, struct pipe_sampler_view *layout)
509{
510   assert(buffer);
511   assert(layout);
512
513   pipe_sampler_view_reference(&buffer->layout, layout);
514}
515
516void
517vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances)
518{
519   struct vl_zscan *zscan;
520
521   assert(buffer);
522
523   zscan = buffer->zscan;
524
525   zscan->pipe->bind_rasterizer_state(zscan->pipe, zscan->rs_state);
526   zscan->pipe->bind_blend_state(zscan->pipe, zscan->blend);
527   zscan->pipe->bind_fragment_sampler_states(zscan->pipe, 2, zscan->samplers);
528   zscan->pipe->set_framebuffer_state(zscan->pipe, &buffer->fb_state);
529   zscan->pipe->set_viewport_state(zscan->pipe, &buffer->viewport);
530   zscan->pipe->set_fragment_sampler_views(zscan->pipe, 2, &buffer->src);
531   zscan->pipe->bind_vs_state(zscan->pipe, zscan->vs);
532   zscan->pipe->bind_fs_state(zscan->pipe, zscan->fs);
533   util_draw_arrays_instanced(zscan->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
534}
535