vl_idct.c revision 336c7735ae97ddc0a177562375136297c2de3d19
1/**************************************************************************
2 *
3 * Copyright 2010 Christian König
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include "vl_idct.h"
29#include "util/u_draw.h"
30#include <assert.h>
31#include <pipe/p_context.h>
32#include <pipe/p_screen.h>
33#include <util/u_inlines.h>
34#include <util/u_sampler.h>
35#include <util/u_format.h>
36#include <tgsi/tgsi_ureg.h>
37#include "vl_types.h"
38
39#define BLOCK_WIDTH 8
40#define BLOCK_HEIGHT 8
41
42#define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f)
43
44#define STAGE1_SCALE 4.0f
45#define STAGE2_SCALE (SCALE_FACTOR_16_TO_9 / STAGE1_SCALE)
46
47struct vertex_shader_consts
48{
49   struct vertex4f norm;
50};
51
52enum VS_INPUT
53{
54   VS_I_RECT,
55   VS_I_VPOS,
56
57   NUM_VS_INPUTS
58};
59
60enum VS_OUTPUT
61{
62   VS_O_VPOS,
63   VS_O_BLOCK,
64   VS_O_TEX,
65   VS_O_START
66};
67
68static const float const_matrix[8][8] = {
69   {  0.3535530f,  0.3535530f,  0.3535530f,  0.3535530f,  0.3535530f,  0.3535530f,  0.353553f,  0.3535530f },
70   {  0.4903930f,  0.4157350f,  0.2777850f,  0.0975451f, -0.0975452f, -0.2777850f, -0.415735f, -0.4903930f },
71   {  0.4619400f,  0.1913420f, -0.1913420f, -0.4619400f, -0.4619400f, -0.1913420f,  0.191342f,  0.4619400f },
72   {  0.4157350f, -0.0975452f, -0.4903930f, -0.2777850f,  0.2777850f,  0.4903930f,  0.097545f, -0.4157350f },
73   {  0.3535530f, -0.3535530f, -0.3535530f,  0.3535540f,  0.3535530f, -0.3535540f, -0.353553f,  0.3535530f },
74   {  0.2777850f, -0.4903930f,  0.0975452f,  0.4157350f, -0.4157350f, -0.0975451f,  0.490393f, -0.2777850f },
75   {  0.1913420f, -0.4619400f,  0.4619400f, -0.1913420f, -0.1913410f,  0.4619400f, -0.461940f,  0.1913420f },
76   {  0.0975451f, -0.2777850f,  0.4157350f, -0.4903930f,  0.4903930f, -0.4157350f,  0.277786f, -0.0975458f }
77};
78
79/* vertices for a quad covering a block */
80static const struct vertex2f const_quad[4] = {
81   {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}
82};
83
84static void *
85create_vert_shader(struct vl_idct *idct, bool calc_src_cords)
86{
87   struct ureg_program *shader;
88   struct ureg_src scale;
89   struct ureg_src vrect, vpos;
90   struct ureg_dst t_vpos;
91   struct ureg_dst o_vpos, o_block, o_tex, o_start;
92
93   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
94   if (!shader)
95      return NULL;
96
97   t_vpos = ureg_DECL_temporary(shader);
98
99   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
100   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
101
102   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
103
104   /*
105    * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height)
106    *
107    * t_vpos = vpos + vrect
108    * o_vpos.xy = t_vpos * scale
109    * o_vpos.zw = vpos
110    *
111    * o_block = vrect
112    * o_tex = t_pos
113    * o_start = vpos * scale
114    *
115    */
116   scale = ureg_imm2f(shader,
117      (float)BLOCK_WIDTH / idct->destination->width0,
118      (float)BLOCK_HEIGHT / idct->destination->height0);
119
120   ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect);
121   ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), scale);
122   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
123   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
124
125   if(calc_src_cords) {
126      o_block = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK);
127      o_tex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX);
128      o_start = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_START);
129
130      ureg_MOV(shader, ureg_writemask(o_block, TGSI_WRITEMASK_XY), vrect);
131      ureg_MOV(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
132      ureg_MUL(shader, ureg_writemask(o_start, TGSI_WRITEMASK_XY), vpos, scale);
133   }
134
135   ureg_release_temporary(shader, t_vpos);
136
137   ureg_END(shader);
138
139   return ureg_create_shader_and_destroy(shader, idct->pipe);
140}
141
142static void
143fetch_one(struct ureg_program *shader, struct ureg_dst m[2],
144          struct ureg_src tc, struct ureg_src sampler,
145          struct ureg_src start, struct ureg_src block, float height)
146{
147   struct ureg_dst t_tc, tmp;
148   unsigned i, j;
149
150   t_tc = ureg_DECL_temporary(shader);
151   tmp = ureg_DECL_temporary(shader);
152
153   m[0] = ureg_DECL_temporary(shader);
154   m[1] = ureg_DECL_temporary(shader);
155
156   /*
157    * t_tc.x = right_side ? start.x : tc.x
158    * t_tc.y = right_side ? tc.y : start.y
159    * m[0..1].xyzw = tex(t_tc++, sampler)
160    */
161   ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_scalar(tc, TGSI_SWIZZLE_X));
162   ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), ureg_scalar(start, TGSI_SWIZZLE_Y));
163   ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_scalar(block, TGSI_SWIZZLE_X));
164
165   for(i = 0; i < 2; ++i) {
166      for(j = 0; j < 4; ++j) {
167         /* Nouveau and r600g can't writemask tex dst regs (yet?), do in two steps */
168         ureg_TEX(shader, tmp, TGSI_TEXTURE_3D, ureg_src(t_tc), sampler);
169         ureg_MOV(shader, ureg_writemask(m[i], TGSI_WRITEMASK_X << j), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
170
171         if(i != 1 || j != 3) /* skip the last add */
172            ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y),
173               ureg_src(t_tc), ureg_imm1f(shader, 1.0f / height));
174      }
175   }
176
177   ureg_release_temporary(shader, t_tc);
178   ureg_release_temporary(shader, tmp);
179}
180
181static void
182fetch_four(struct ureg_program *shader, struct ureg_dst m[2],
183           struct ureg_src tc, struct ureg_src sampler,
184           struct ureg_src start, bool right_side, float width)
185{
186   struct ureg_dst t_tc;
187
188   t_tc = ureg_DECL_temporary(shader);
189   m[0] = ureg_DECL_temporary(shader);
190   m[1] = ureg_DECL_temporary(shader);
191
192   /*
193    * t_tc.x = right_side ? start.x : tc.x
194    * t_tc.y = right_side ? tc.y : start.y
195    * m[0..1] = tex(t_tc++, sampler)
196    */
197   if(right_side) {
198      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_scalar(start, TGSI_SWIZZLE_Y));
199      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), ureg_scalar(tc, TGSI_SWIZZLE_X));
200   } else {
201      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_scalar(start, TGSI_SWIZZLE_X));
202      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), ureg_scalar(tc, TGSI_SWIZZLE_Y));
203   }
204
205   ureg_TEX(shader, m[0], TGSI_TEXTURE_2D, ureg_src(t_tc), sampler);
206   ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_src(t_tc), ureg_imm1f(shader, 1.0f / width));
207   ureg_TEX(shader, m[1], TGSI_TEXTURE_2D, ureg_src(t_tc), sampler);
208
209   ureg_release_temporary(shader, t_tc);
210}
211
212static void
213matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2])
214{
215   struct ureg_dst tmp[2];
216   unsigned i;
217
218   for(i = 0; i < 2; ++i) {
219      tmp[i] = ureg_DECL_temporary(shader);
220   }
221
222   /*
223    * tmp[0..1] = dot4(m[0][0..1], m[1][0..1])
224    * dst = tmp[0] + tmp[1]
225    */
226   ureg_DP4(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0]));
227   ureg_DP4(shader, ureg_writemask(tmp[1], TGSI_WRITEMASK_X), ureg_src(l[1]), ureg_src(r[1]));
228   ureg_ADD(shader, dst, ureg_src(tmp[0]), ureg_src(tmp[1]));
229
230   for(i = 0; i < 2; ++i) {
231      ureg_release_temporary(shader, tmp[i]);
232   }
233}
234
235static void *
236create_transpose_frag_shader(struct vl_idct *idct)
237{
238   struct pipe_resource *transpose = idct->textures.individual.transpose;
239   struct pipe_resource *intermediate = idct->textures.individual.intermediate;
240
241   struct ureg_program *shader;
242
243   struct ureg_src block, tex, sampler[2];
244   struct ureg_src start[2];
245
246   struct ureg_dst m[2][2];
247   struct ureg_dst tmp, fragment;
248
249   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
250   if (!shader)
251      return NULL;
252
253   block = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
254   tex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_CONSTANT);
255
256   sampler[0] = ureg_DECL_sampler(shader, 0);
257   sampler[1] = ureg_DECL_sampler(shader, 1);
258
259   start[0] = ureg_imm1f(shader, 0.0f);
260   start[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
261
262   fetch_four(shader, m[0], block, sampler[0], start[0], false, transpose->width0);
263   fetch_one(shader, m[1], tex, sampler[1], start[1], block, intermediate->height0);
264
265   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
266
267   tmp = ureg_DECL_temporary(shader);
268   matrix_mul(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), m[0], m[1]);
269   ureg_MUL(shader, fragment, ureg_src(tmp), ureg_imm1f(shader, STAGE2_SCALE));
270
271   ureg_release_temporary(shader, tmp);
272   ureg_release_temporary(shader, m[0][0]);
273   ureg_release_temporary(shader, m[0][1]);
274   ureg_release_temporary(shader, m[1][0]);
275   ureg_release_temporary(shader, m[1][1]);
276
277   ureg_END(shader);
278
279   return ureg_create_shader_and_destroy(shader, idct->pipe);
280}
281
282static void *
283create_matrix_frag_shader(struct vl_idct *idct)
284{
285   struct pipe_resource *matrix = idct->textures.individual.matrix;
286   struct pipe_resource *source = idct->textures.individual.source;
287
288   struct ureg_program *shader;
289
290   struct ureg_src tc[2], sampler[2];
291   struct ureg_src start[2];
292
293   struct ureg_dst l[2], r[2];
294   struct ureg_dst t_tc, tmp, fragment[BLOCK_WIDTH];
295
296   unsigned i;
297
298   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
299   if (!shader)
300      return NULL;
301
302   t_tc = ureg_DECL_temporary(shader);
303   tmp = ureg_DECL_temporary(shader);
304
305   tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR);
306   tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
307
308   sampler[0] = ureg_DECL_sampler(shader, 1);
309   sampler[1] = ureg_DECL_sampler(shader, 0);
310
311   start[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
312   start[1] = ureg_imm1f(shader, 0.0f);
313
314   for (i = 0; i < BLOCK_WIDTH; ++i)
315       fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i);
316
317   fetch_four(shader, l, tc[0], sampler[0], start[0], false, source->width0);
318   ureg_MUL(shader, l[0], ureg_src(l[0]), ureg_scalar(ureg_imm1f(shader, STAGE1_SCALE), TGSI_SWIZZLE_X));
319   ureg_MUL(shader, l[1], ureg_src(l[1]), ureg_scalar(ureg_imm1f(shader, STAGE1_SCALE), TGSI_SWIZZLE_X));
320
321   for (i = 0; i < BLOCK_WIDTH; ++i) {
322      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_imm1f(shader, 1.0f / BLOCK_WIDTH * i));
323      fetch_four(shader, r, ureg_src(t_tc), sampler[1], start[1], true, matrix->width0);
324      matrix_mul(shader, fragment[i], l, r);
325      ureg_release_temporary(shader, r[0]);
326      ureg_release_temporary(shader, r[1]);
327   }
328
329   ureg_release_temporary(shader, t_tc);
330   ureg_release_temporary(shader, tmp);
331   ureg_release_temporary(shader, l[0]);
332   ureg_release_temporary(shader, l[1]);
333
334   ureg_END(shader);
335
336   return ureg_create_shader_and_destroy(shader, idct->pipe);
337}
338
339static void *
340create_empty_block_frag_shader(struct vl_idct *idct)
341{
342   struct ureg_program *shader;
343   struct ureg_dst fragment;
344
345   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
346   if (!shader)
347      return NULL;
348
349   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
350
351   ureg_MOV(shader, fragment, ureg_imm1f(shader, 0.0f));
352
353   ureg_END(shader);
354
355   return ureg_create_shader_and_destroy(shader, idct->pipe);
356}
357
358static bool
359init_shaders(struct vl_idct *idct)
360{
361   idct->matrix_vs = create_vert_shader(idct, true);
362   idct->matrix_fs = create_matrix_frag_shader(idct);
363
364   idct->transpose_vs = create_vert_shader(idct, true);
365   idct->transpose_fs = create_transpose_frag_shader(idct);
366
367   idct->eb_vs = create_vert_shader(idct, false);
368   idct->eb_fs = create_empty_block_frag_shader(idct);
369
370   return
371      idct->transpose_vs != NULL && idct->transpose_fs != NULL &&
372      idct->matrix_vs != NULL && idct->matrix_fs != NULL &&
373      idct->eb_vs != NULL && idct->eb_fs != NULL;
374}
375
376static void
377cleanup_shaders(struct vl_idct *idct)
378{
379   idct->pipe->delete_vs_state(idct->pipe, idct->transpose_vs);
380   idct->pipe->delete_fs_state(idct->pipe, idct->transpose_fs);
381
382   idct->pipe->delete_vs_state(idct->pipe, idct->matrix_vs);
383   idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs);
384
385   idct->pipe->delete_vs_state(idct->pipe, idct->eb_vs);
386   idct->pipe->delete_fs_state(idct->pipe, idct->eb_fs);
387}
388
389static bool
390init_buffers(struct vl_idct *idct)
391{
392   struct pipe_resource template;
393   struct pipe_sampler_view sampler_view;
394   struct pipe_vertex_element vertex_elems[2];
395   unsigned i;
396
397   idct->max_blocks =
398      align(idct->destination->width0, BLOCK_WIDTH) / BLOCK_WIDTH *
399      align(idct->destination->height0, BLOCK_HEIGHT) / BLOCK_HEIGHT *
400      idct->destination->depth0;
401
402   memset(&template, 0, sizeof(struct pipe_resource));
403   template.last_level = 0;
404   template.depth0 = 1;
405   template.bind = PIPE_BIND_SAMPLER_VIEW;
406   template.flags = 0;
407
408   template.target = PIPE_TEXTURE_2D;
409   template.format = PIPE_FORMAT_R16G16B16A16_SNORM;
410   template.width0 = idct->destination->width0 / 4;
411   template.height0 = idct->destination->height0;
412   template.depth0 = 1;
413   template.usage = PIPE_USAGE_STREAM;
414   idct->textures.individual.source = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
415
416   template.target = PIPE_TEXTURE_3D;
417   template.format = PIPE_FORMAT_R16_SNORM;
418   template.width0 = idct->destination->width0 / 8;
419   template.depth0 = 8;
420   template.usage = PIPE_USAGE_STATIC;
421   idct->textures.individual.intermediate = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
422
423   for (i = 0; i < 4; ++i) {
424      if(idct->textures.all[i] == NULL)
425         return false; /* a texture failed to allocate */
426
427      u_sampler_view_default_template(&sampler_view, idct->textures.all[i], idct->textures.all[i]->format);
428      idct->sampler_views.all[i] = idct->pipe->create_sampler_view(idct->pipe, idct->textures.all[i], &sampler_view);
429   }
430
431   idct->vertex_bufs.individual.quad.stride = sizeof(struct vertex2f);
432   idct->vertex_bufs.individual.quad.max_index = 4 * idct->max_blocks - 1;
433   idct->vertex_bufs.individual.quad.buffer_offset = 0;
434   idct->vertex_bufs.individual.quad.buffer = pipe_buffer_create
435   (
436      idct->pipe->screen,
437      PIPE_BIND_VERTEX_BUFFER,
438      sizeof(struct vertex2f) * 4 * idct->max_blocks
439   );
440
441   if(idct->vertex_bufs.individual.quad.buffer == NULL)
442      return false;
443
444   idct->vertex_bufs.individual.pos.stride = sizeof(struct vertex2f);
445   idct->vertex_bufs.individual.pos.max_index = 4 * idct->max_blocks - 1;
446   idct->vertex_bufs.individual.pos.buffer_offset = 0;
447   idct->vertex_bufs.individual.pos.buffer = pipe_buffer_create
448   (
449      idct->pipe->screen,
450      PIPE_BIND_VERTEX_BUFFER,
451      sizeof(struct vertex2f) * 4 * idct->max_blocks
452   );
453
454   if(idct->vertex_bufs.individual.pos.buffer == NULL)
455      return false;
456
457   /* Rect element */
458   vertex_elems[0].src_offset = 0;
459   vertex_elems[0].instance_divisor = 0;
460   vertex_elems[0].vertex_buffer_index = 0;
461   vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
462
463   /* Pos element */
464   vertex_elems[1].src_offset = 0;
465   vertex_elems[1].instance_divisor = 0;
466   vertex_elems[1].vertex_buffer_index = 1;
467   vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
468
469   idct->vertex_elems_state = idct->pipe->create_vertex_elements_state(idct->pipe, 2, vertex_elems);
470
471   return true;
472}
473
474static void
475cleanup_buffers(struct vl_idct *idct)
476{
477   unsigned i;
478
479   assert(idct);
480
481   for (i = 0; i < 4; ++i) {
482      pipe_sampler_view_reference(&idct->sampler_views.all[i], NULL);
483      pipe_resource_reference(&idct->textures.all[i], NULL);
484   }
485
486   idct->pipe->delete_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
487   pipe_resource_reference(&idct->vertex_bufs.individual.quad.buffer, NULL);
488   pipe_resource_reference(&idct->vertex_bufs.individual.pos.buffer, NULL);
489}
490
491static void
492init_constants(struct vl_idct *idct)
493{
494   struct pipe_transfer *buf_transfer;
495   struct vertex2f *v;
496
497   unsigned i;
498
499   /* quad vectors */
500   v = pipe_buffer_map
501   (
502      idct->pipe,
503      idct->vertex_bufs.individual.quad.buffer,
504      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
505      &buf_transfer
506   );
507   for ( i = 0; i < idct->max_blocks; ++i)
508     memcpy(v + i * 4, &const_quad, sizeof(const_quad));
509   pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.quad.buffer, buf_transfer);
510}
511
512static void
513init_state(struct vl_idct *idct)
514{
515   struct pipe_sampler_state sampler;
516   unsigned i;
517
518   idct->num_blocks = 0;
519   idct->num_empty_blocks = 0;
520
521   idct->viewport[0].scale[0] = idct->textures.individual.intermediate->width0;
522   idct->viewport[0].scale[1] = idct->textures.individual.intermediate->height0;
523
524   idct->viewport[1].scale[0] = idct->destination->width0;
525   idct->viewport[1].scale[1] = idct->destination->height0;
526
527   idct->fb_state[0].width = idct->textures.individual.intermediate->width0;
528   idct->fb_state[0].height = idct->textures.individual.intermediate->height0;
529
530   idct->fb_state[0].nr_cbufs = 8;
531   for(i = 0; i < 8; ++i) {
532      idct->fb_state[0].cbufs[i] = idct->pipe->screen->get_tex_surface(
533         idct->pipe->screen, idct->textures.individual.intermediate, 0, 0, i,
534         PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
535   }
536
537   idct->fb_state[1].width = idct->destination->width0;
538   idct->fb_state[1].height = idct->destination->height0;
539
540   idct->fb_state[1].nr_cbufs = 1;
541   idct->fb_state[1].cbufs[0] = idct->pipe->screen->get_tex_surface(
542      idct->pipe->screen, idct->destination, 0, 0, 0,
543      PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
544
545   for(i = 0; i < 2; ++i) {
546      idct->viewport[i].scale[2] = 1;
547      idct->viewport[i].scale[3] = 1;
548      idct->viewport[i].translate[0] = 0;
549      idct->viewport[i].translate[1] = 0;
550      idct->viewport[i].translate[2] = 0;
551      idct->viewport[i].translate[3] = 0;
552
553      idct->fb_state[i].zsbuf = NULL;
554   }
555
556   for (i = 0; i < 4; ++i) {
557      memset(&sampler, 0, sizeof(sampler));
558      sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
559      sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
560      sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
561      sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
562      sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
563      sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
564      sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
565      sampler.compare_func = PIPE_FUNC_ALWAYS;
566      sampler.normalized_coords = 1;
567      /*sampler.shadow_ambient = ; */
568      /*sampler.lod_bias = ; */
569      sampler.min_lod = 0;
570      /*sampler.max_lod = ; */
571      /*sampler.border_color[0] = ; */
572      /*sampler.max_anisotropy = ; */
573      idct->samplers.all[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler);
574   }
575}
576
577static void
578cleanup_state(struct vl_idct *idct)
579{
580   unsigned i;
581
582   for(i = 0; i < 8; ++i) {
583      idct->pipe->screen->tex_surface_destroy(idct->fb_state[0].cbufs[i]);
584   }
585
586   idct->pipe->screen->tex_surface_destroy(idct->fb_state[1].cbufs[0]);
587
588   for (i = 0; i < 4; ++i)
589      idct->pipe->delete_sampler_state(idct->pipe, idct->samplers.all[i]);
590}
591
592struct pipe_resource *
593vl_idct_upload_matrix(struct pipe_context *pipe)
594{
595   struct pipe_resource template, *matrix;
596   struct pipe_transfer *buf_transfer;
597   unsigned i, j, pitch;
598   float *f;
599
600   struct pipe_box rect =
601   {
602      0, 0, 0,
603      BLOCK_WIDTH,
604      BLOCK_HEIGHT,
605      1
606   };
607
608   memset(&template, 0, sizeof(struct pipe_resource));
609   template.target = PIPE_TEXTURE_2D;
610   template.format = PIPE_FORMAT_R32G32B32A32_FLOAT;
611   template.last_level = 0;
612   template.width0 = 2;
613   template.height0 = 8;
614   template.depth0 = 1;
615   template.usage = PIPE_USAGE_IMMUTABLE;
616   template.bind = PIPE_BIND_SAMPLER_VIEW;
617   template.flags = 0;
618
619   matrix = pipe->screen->resource_create(pipe->screen, &template);
620
621   /* matrix */
622   buf_transfer = pipe->get_transfer
623   (
624      pipe, matrix,
625      u_subresource(0, 0),
626      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
627      &rect
628   );
629   pitch = buf_transfer->stride / sizeof(float);
630
631   f = pipe->transfer_map(pipe, buf_transfer);
632   for(i = 0; i < BLOCK_HEIGHT; ++i)
633      for(j = 0; j < BLOCK_WIDTH; ++j)
634         f[i * pitch + j] = const_matrix[j][i]; // transpose
635
636   pipe->transfer_unmap(pipe, buf_transfer);
637   pipe->transfer_destroy(pipe, buf_transfer);
638
639   return matrix;
640}
641
642static void
643xfer_buffers_map(struct vl_idct *idct)
644{
645   struct pipe_box rect =
646   {
647      0, 0, 0,
648      idct->textures.individual.source->width0,
649      idct->textures.individual.source->height0,
650      1
651   };
652
653   idct->tex_transfer = idct->pipe->get_transfer
654   (
655      idct->pipe, idct->textures.individual.source,
656      u_subresource(0, 0),
657      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
658      &rect
659   );
660
661   idct->texels = idct->pipe->transfer_map(idct->pipe, idct->tex_transfer);
662
663   idct->vectors = pipe_buffer_map
664   (
665      idct->pipe,
666      idct->vertex_bufs.individual.pos.buffer,
667      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
668      &idct->vec_transfer
669   );
670
671   idct->next_empty_block.l_x = ~1;
672   idct->next_empty_block.l_y = ~1;
673   idct->next_empty_block.r_x = ~1;
674   idct->next_empty_block.r_y = ~1;
675}
676
677static void
678xfer_buffers_unmap(struct vl_idct *idct)
679{
680   pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.pos.buffer, idct->vec_transfer);
681
682   idct->pipe->transfer_unmap(idct->pipe, idct->tex_transfer);
683   idct->pipe->transfer_destroy(idct->pipe, idct->tex_transfer);
684}
685
686bool
687vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resource *dst, struct pipe_resource *matrix)
688{
689   assert(idct && pipe && dst);
690
691   idct->pipe = pipe;
692   pipe_resource_reference(&idct->textures.individual.matrix, matrix);
693   pipe_resource_reference(&idct->textures.individual.transpose, matrix);
694   pipe_resource_reference(&idct->destination, dst);
695
696   if(!init_buffers(idct))
697      return false;
698
699   if(!init_shaders(idct)) {
700      cleanup_buffers(idct);
701      return false;
702   }
703
704   init_state(idct);
705
706   init_constants(idct);
707   xfer_buffers_map(idct);
708
709   return true;
710}
711
712void
713vl_idct_cleanup(struct vl_idct *idct)
714{
715   cleanup_shaders(idct);
716   cleanup_buffers(idct);
717
718   cleanup_state(idct);
719
720   pipe_resource_reference(&idct->destination, NULL);
721}
722
723static void
724flush_empty_block(struct vl_idct *idct, unsigned new_x, unsigned new_y)
725{
726   if (idct->next_empty_block.l_x == ~1 ||
727       idct->next_empty_block.l_y == ~1) {
728
729      idct->next_empty_block.l_x = new_x;
730      idct->next_empty_block.l_y = new_y;
731
732   } else if (idct->next_empty_block.r_x != (new_x - 1) ||
733              idct->next_empty_block.r_y != new_y) {
734
735      struct vertex2f l, r, *v_dst;
736
737      v_dst = idct->vectors + (idct->max_blocks - idct->num_empty_blocks) * 4 - 4;
738
739      l.x = idct->next_empty_block.l_x;
740      l.y = idct->next_empty_block.l_y;
741      r.x = idct->next_empty_block.r_x;
742      r.y = idct->next_empty_block.r_y;
743      v_dst[0] = v_dst[3] = l;
744      v_dst[1] = v_dst[2] = r;
745
746      idct->next_empty_block.l_x = new_x;
747      idct->next_empty_block.l_y = new_y;
748      idct->num_empty_blocks++;
749   }
750
751   idct->next_empty_block.r_x = new_x;
752   idct->next_empty_block.r_y = new_y;
753}
754
755void
756vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
757{
758   struct vertex2f v, *v_dst;
759
760   unsigned tex_pitch;
761   short *texels;
762
763   unsigned i;
764
765   assert(idct);
766
767   if(block) {
768      tex_pitch = idct->tex_transfer->stride / sizeof(short);
769      texels = idct->texels + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH;
770
771      for (i = 0; i < BLOCK_HEIGHT; ++i)
772         memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * sizeof(short));
773
774      /* non empty blocks fills the vector buffer from left to right */
775      v_dst = idct->vectors + idct->num_blocks * 4;
776
777      idct->num_blocks++;
778
779      v.x = x;
780      v.y = y;
781
782      for (i = 0; i < 4; ++i) {
783         v_dst[i] = v;
784      }
785
786   } else {
787
788      /* while empty blocks fills the vector buffer from right to left */
789      flush_empty_block(idct, x, y);
790   }
791}
792
793void
794vl_idct_flush(struct vl_idct *idct)
795{
796   flush_empty_block(idct, ~1, ~1);
797   xfer_buffers_unmap(idct);
798
799   if(idct->num_blocks > 0) {
800
801      /* first stage */
802      idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[0]);
803      idct->pipe->set_viewport_state(idct->pipe, &idct->viewport[0]);
804
805      idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
806      idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
807      idct->pipe->set_fragment_sampler_views(idct->pipe, 2, idct->sampler_views.stage[0]);
808      idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[0]);
809      idct->pipe->bind_vs_state(idct->pipe, idct->matrix_vs);
810      idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
811
812      util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4);
813
814      /* second stage */
815      idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[1]);
816      idct->pipe->set_viewport_state(idct->pipe, &idct->viewport[1]);
817
818      idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
819      idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
820      idct->pipe->set_fragment_sampler_views(idct->pipe, 2, idct->sampler_views.stage[1]);
821      idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[1]);
822      idct->pipe->bind_vs_state(idct->pipe, idct->transpose_vs);
823      idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
824
825      util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4);
826   }
827
828   if(idct->num_empty_blocks > 0) {
829
830      /* empty block handling */
831      idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[1]);
832      idct->pipe->set_viewport_state(idct->pipe, &idct->viewport[1]);
833
834      idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
835      idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
836      idct->pipe->bind_vs_state(idct->pipe, idct->eb_vs);
837      idct->pipe->bind_fs_state(idct->pipe, idct->eb_fs);
838
839      util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS,
840         (idct->max_blocks - idct->num_empty_blocks) * 4,
841         idct->num_empty_blocks * 4);
842   }
843
844   idct->num_blocks = 0;
845   idct->num_empty_blocks = 0;
846   xfer_buffers_map(idct);
847}
848