vl_idct.c revision 3fd53e6c2a05e65872de4292557d7839cbcf7395
1/**************************************************************************
2 *
3 * Copyright 2010 Christian König
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include "vl_idct.h"
29#include "util/u_draw.h"
30#include <assert.h>
31#include <pipe/p_context.h>
32#include <pipe/p_screen.h>
33#include <util/u_inlines.h>
34#include <util/u_sampler.h>
35#include <util/u_format.h>
36#include <tgsi/tgsi_ureg.h>
37#include "vl_types.h"
38
39#define BLOCK_WIDTH 8
40#define BLOCK_HEIGHT 8
41
42#define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f)
43
44#define STAGE1_SCALE 4.0f
45#define STAGE2_SCALE (SCALE_FACTOR_16_TO_9 / STAGE1_SCALE)
46
47struct vertex_shader_consts
48{
49   struct vertex4f norm;
50};
51
52enum VS_INPUT
53{
54   VS_I_RECT,
55   VS_I_VPOS,
56
57   NUM_VS_INPUTS
58};
59
60enum VS_OUTPUT
61{
62   VS_O_VPOS,
63   VS_O_BLOCK,
64   VS_O_TEX,
65   VS_O_START
66};
67
68static const float const_matrix[8][8] = {
69   {  0.3535530f,  0.3535530f,  0.3535530f,  0.3535530f,  0.3535530f,  0.3535530f,  0.353553f,  0.3535530f },
70   {  0.4903930f,  0.4157350f,  0.2777850f,  0.0975451f, -0.0975452f, -0.2777850f, -0.415735f, -0.4903930f },
71   {  0.4619400f,  0.1913420f, -0.1913420f, -0.4619400f, -0.4619400f, -0.1913420f,  0.191342f,  0.4619400f },
72   {  0.4157350f, -0.0975452f, -0.4903930f, -0.2777850f,  0.2777850f,  0.4903930f,  0.097545f, -0.4157350f },
73   {  0.3535530f, -0.3535530f, -0.3535530f,  0.3535540f,  0.3535530f, -0.3535540f, -0.353553f,  0.3535530f },
74   {  0.2777850f, -0.4903930f,  0.0975452f,  0.4157350f, -0.4157350f, -0.0975451f,  0.490393f, -0.2777850f },
75   {  0.1913420f, -0.4619400f,  0.4619400f, -0.1913420f, -0.1913410f,  0.4619400f, -0.461940f,  0.1913420f },
76   {  0.0975451f, -0.2777850f,  0.4157350f, -0.4903930f,  0.4903930f, -0.4157350f,  0.277786f, -0.0975458f }
77};
78
79/* vertices for a quad covering a block */
80static const struct vertex2f const_quad[4] = {
81   {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}
82};
83
84static void *
85create_vert_shader(struct vl_idct *idct, bool calc_src_cords, struct pipe_resource *dst)
86{
87   struct ureg_program *shader;
88   struct ureg_src scale;
89   struct ureg_src vrect, vpos;
90   struct ureg_dst t_vpos;
91   struct ureg_dst o_vpos, o_block, o_tex, o_start;
92
93   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
94   if (!shader)
95      return NULL;
96
97   t_vpos = ureg_DECL_temporary(shader);
98
99   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
100   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
101
102   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
103
104   /*
105    * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height)
106    *
107    * t_vpos = vpos + vrect
108    * o_vpos.xy = t_vpos * scale
109    * o_vpos.zw = vpos
110    *
111    * o_block = vrect
112    * o_tex = t_pos
113    * o_start = vpos * scale
114    *
115    */
116   scale = ureg_imm2f(shader,
117      (float)BLOCK_WIDTH / dst->width0,
118      (float)BLOCK_HEIGHT / dst->height0);
119
120   ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect);
121   ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), scale);
122   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
123   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
124
125   if(calc_src_cords) {
126      o_block = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK);
127      o_tex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX);
128      o_start = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_START);
129
130      ureg_MOV(shader, ureg_writemask(o_block, TGSI_WRITEMASK_XY), vrect);
131      ureg_MOV(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
132      ureg_MUL(shader, ureg_writemask(o_start, TGSI_WRITEMASK_XY), vpos, scale);
133   }
134
135   ureg_release_temporary(shader, t_vpos);
136
137   ureg_END(shader);
138
139   return ureg_create_shader_and_destroy(shader, idct->pipe);
140}
141
142static void
143fetch_one(struct ureg_program *shader, struct ureg_dst m[2],
144          struct ureg_src tc, struct ureg_src sampler,
145          struct ureg_src start, bool right_side, float size)
146{
147   struct ureg_dst t_tc, tmp;
148   unsigned i, j;
149
150   t_tc = ureg_DECL_temporary(shader);
151   tmp = ureg_DECL_temporary(shader);
152
153   m[0] = ureg_DECL_temporary(shader);
154   m[1] = ureg_DECL_temporary(shader);
155
156   /*
157    * t_tc.x = right_side ? start.x : tc.x
158    * t_tc.y = right_side ? tc.y : start.y
159    * m[0..1].xyzw = tex(t_tc++, sampler)
160    */
161   if(right_side) {
162      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_scalar(tc, TGSI_SWIZZLE_X));
163      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), ureg_scalar(start, TGSI_SWIZZLE_Y));
164   } else {
165      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_scalar(start, TGSI_SWIZZLE_X));
166      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), ureg_scalar(tc, TGSI_SWIZZLE_Y));
167   }
168   for(i = 0; i < 2; ++i) {
169      for(j = 0; j < 4; ++j) {
170         /* Nouveau and r600g can't writemask tex dst regs (yet?), do in two steps */
171         ureg_TEX(shader, tmp, TGSI_TEXTURE_2D, ureg_src(t_tc), sampler);
172         ureg_MOV(shader, ureg_writemask(m[i], TGSI_WRITEMASK_X << j), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
173
174         if(i != 1 || j != 3) /* skip the last add */
175            ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X << right_side),
176               ureg_src(t_tc), ureg_imm1f(shader, 1.0f / size));
177      }
178   }
179
180   ureg_release_temporary(shader, t_tc);
181   ureg_release_temporary(shader, tmp);
182}
183
184static void
185fetch_four(struct ureg_program *shader, struct ureg_dst m[2],
186           struct ureg_src tc, struct ureg_src sampler,
187           struct ureg_src start, bool right_side, float width)
188{
189   struct ureg_dst t_tc;
190
191   t_tc = ureg_DECL_temporary(shader);
192   m[0] = ureg_DECL_temporary(shader);
193   m[1] = ureg_DECL_temporary(shader);
194
195   /*
196    * t_tc.x = right_side ? start.x : tc.x
197    * t_tc.y = right_side ? tc.y : start.y
198    * m[0..1] = tex(t_tc++, sampler)
199    */
200   if(right_side) {
201      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_scalar(start, TGSI_SWIZZLE_Y));
202      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), ureg_scalar(tc, TGSI_SWIZZLE_X));
203   } else {
204      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_scalar(start, TGSI_SWIZZLE_X));
205      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), ureg_scalar(tc, TGSI_SWIZZLE_Y));
206   }
207
208   ureg_TEX(shader, m[0], TGSI_TEXTURE_2D, ureg_src(t_tc), sampler);
209   ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_src(t_tc), ureg_imm1f(shader, 1.0f / width));
210   ureg_TEX(shader, m[1], TGSI_TEXTURE_2D, ureg_src(t_tc), sampler);
211
212   ureg_release_temporary(shader, t_tc);
213}
214
215static void
216matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2])
217{
218   struct ureg_dst tmp[2];
219   unsigned i;
220
221   for(i = 0; i < 2; ++i) {
222      tmp[i] = ureg_DECL_temporary(shader);
223   }
224
225   /*
226    * tmp[0..1] = dot4(m[0][0..1], m[1][0..1])
227    * dst = tmp[0] + tmp[1]
228    */
229   ureg_DP4(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0]));
230   ureg_DP4(shader, ureg_writemask(tmp[1], TGSI_WRITEMASK_X), ureg_src(l[1]), ureg_src(r[1]));
231   ureg_ADD(shader, dst, ureg_src(tmp[0]), ureg_src(tmp[1]));
232
233   for(i = 0; i < 2; ++i) {
234      ureg_release_temporary(shader, tmp[i]);
235   }
236}
237
238static void *
239create_transpose_frag_shader(struct vl_idct *idct)
240{
241   struct pipe_resource *transpose = idct->textures.individual.transpose;
242   struct pipe_resource *intermediate = idct->textures.individual.intermediate;
243
244   struct ureg_program *shader;
245
246   struct ureg_src tc[2], sampler[2];
247   struct ureg_src start[2];
248
249   struct ureg_dst m[2][2];
250   struct ureg_dst tmp, fragment;
251
252   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
253   if (!shader)
254      return NULL;
255
256   tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
257   tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR);
258
259   sampler[0] = ureg_DECL_sampler(shader, 0);
260   sampler[1] = ureg_DECL_sampler(shader, 1);
261
262   start[0] = ureg_imm1f(shader, 0.0f);
263   start[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
264
265   fetch_four(shader, m[0], tc[0], sampler[0], start[0], false, transpose->width0);
266   fetch_one(shader, m[1], tc[1], sampler[1], start[1], true, intermediate->height0);
267
268   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
269
270   tmp = ureg_DECL_temporary(shader);
271   matrix_mul(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), m[0], m[1]);
272   ureg_MUL(shader, fragment, ureg_src(tmp), ureg_imm1f(shader, STAGE2_SCALE));
273
274   ureg_release_temporary(shader, tmp);
275   ureg_release_temporary(shader, m[0][0]);
276   ureg_release_temporary(shader, m[0][1]);
277   ureg_release_temporary(shader, m[1][0]);
278   ureg_release_temporary(shader, m[1][1]);
279
280   ureg_END(shader);
281
282   return ureg_create_shader_and_destroy(shader, idct->pipe);
283}
284
285static void *
286create_matrix_frag_shader(struct vl_idct *idct)
287{
288   struct pipe_resource *matrix = idct->textures.individual.matrix;
289   struct pipe_resource *source = idct->textures.individual.source;
290
291   struct ureg_program *shader;
292
293   struct ureg_src tc[2], sampler[2];
294   struct ureg_src start[2];
295
296   struct ureg_dst l[2], r[2];
297   struct ureg_dst t_tc, tmp, fragment;
298
299   //unsigned i;
300
301   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
302   if (!shader)
303      return NULL;
304
305   t_tc = ureg_DECL_temporary(shader);
306   tmp = ureg_DECL_temporary(shader);
307
308   tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR);
309   tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
310
311   sampler[0] = ureg_DECL_sampler(shader, 1);
312   sampler[1] = ureg_DECL_sampler(shader, 0);
313
314   start[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
315   start[1] = ureg_imm1f(shader, 0.0f);
316
317   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
318
319   fetch_four(shader, r, tc[1], sampler[1], start[1], true, matrix->width0);
320
321   ureg_MOV(shader, t_tc, tc[0]);
322   //for (i = 0; i < 4; ++i) {
323
324      fetch_four(shader, l, ureg_src(t_tc), sampler[0], start[0], false, source->width0);
325      matrix_mul(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X /* << i */), l, r);
326      ureg_release_temporary(shader, l[0]);
327      ureg_release_temporary(shader, l[1]);
328
329   //   if (i != 3)
330   //      ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y),
331   //         ureg_src(t_tc), ureg_imm1f(shader, 1.0f / source->height0));
332   //}
333
334   ureg_MUL(shader, fragment, ureg_src(tmp), ureg_scalar(ureg_imm1f(shader, STAGE1_SCALE), TGSI_SWIZZLE_X));
335
336   ureg_release_temporary(shader, t_tc);
337   ureg_release_temporary(shader, tmp);
338   ureg_release_temporary(shader, r[0]);
339   ureg_release_temporary(shader, r[1]);
340
341   ureg_END(shader);
342
343   return ureg_create_shader_and_destroy(shader, idct->pipe);
344}
345
346static void *
347create_empty_block_frag_shader(struct vl_idct *idct)
348{
349   struct ureg_program *shader;
350   struct ureg_dst fragment;
351
352   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
353   if (!shader)
354      return NULL;
355
356   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
357
358   ureg_MOV(shader, fragment, ureg_imm1f(shader, 0.0f));
359
360   ureg_END(shader);
361
362   return ureg_create_shader_and_destroy(shader, idct->pipe);
363}
364
365static void
366xfer_buffers_map(struct vl_idct *idct)
367{
368   struct pipe_box rect =
369   {
370      0, 0, 0,
371      idct->textures.individual.source->width0,
372      idct->textures.individual.source->height0,
373      1
374   };
375
376   idct->tex_transfer = idct->pipe->get_transfer
377   (
378      idct->pipe, idct->textures.individual.source,
379      u_subresource(0, 0),
380      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
381      &rect
382   );
383
384   idct->texels = idct->pipe->transfer_map(idct->pipe, idct->tex_transfer);
385
386   idct->vectors = pipe_buffer_map
387   (
388      idct->pipe,
389      idct->vertex_bufs.individual.pos.buffer,
390      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
391      &idct->vec_transfer
392   );
393}
394
395static void
396xfer_buffers_unmap(struct vl_idct *idct)
397{
398   pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.pos.buffer, idct->vec_transfer);
399
400   idct->pipe->transfer_unmap(idct->pipe, idct->tex_transfer);
401   idct->pipe->transfer_destroy(idct->pipe, idct->tex_transfer);
402}
403
404static bool
405init_shaders(struct vl_idct *idct)
406{
407   idct->matrix_vs = create_vert_shader(idct, true, idct->textures.individual.intermediate);
408   idct->matrix_fs = create_matrix_frag_shader(idct);
409
410   idct->transpose_vs = create_vert_shader(idct, true, idct->destination);
411   idct->transpose_fs = create_transpose_frag_shader(idct);
412
413   idct->eb_vs = create_vert_shader(idct, false, idct->destination);
414   idct->eb_fs = create_empty_block_frag_shader(idct);
415
416   return
417      idct->transpose_vs != NULL && idct->transpose_fs != NULL &&
418      idct->matrix_vs != NULL && idct->matrix_fs != NULL &&
419      idct->eb_vs != NULL && idct->eb_fs != NULL;
420}
421
422static void
423cleanup_shaders(struct vl_idct *idct)
424{
425   idct->pipe->delete_vs_state(idct->pipe, idct->transpose_vs);
426   idct->pipe->delete_fs_state(idct->pipe, idct->transpose_fs);
427
428   idct->pipe->delete_vs_state(idct->pipe, idct->matrix_vs);
429   idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs);
430
431   idct->pipe->delete_vs_state(idct->pipe, idct->eb_vs);
432   idct->pipe->delete_fs_state(idct->pipe, idct->eb_fs);
433}
434
435static bool
436init_buffers(struct vl_idct *idct)
437{
438   struct pipe_resource template;
439   struct pipe_sampler_view sampler_view;
440   struct pipe_vertex_element vertex_elems[2];
441   unsigned i;
442
443   idct->max_blocks =
444      align(idct->destination->width0, BLOCK_WIDTH) / BLOCK_WIDTH *
445      align(idct->destination->height0, BLOCK_HEIGHT) / BLOCK_HEIGHT *
446      idct->destination->depth0;
447
448   memset(&template, 0, sizeof(struct pipe_resource));
449   template.target = PIPE_TEXTURE_2D;
450   template.last_level = 0;
451   template.depth0 = 1;
452   template.bind = PIPE_BIND_SAMPLER_VIEW;
453   template.flags = 0;
454   template.depth0 = 1;
455
456   template.format = PIPE_FORMAT_R16G16B16A16_SNORM;
457   template.width0 = idct->destination->width0 / 4;
458   template.height0 = idct->destination->height0;
459   template.usage = PIPE_USAGE_STREAM;
460   idct->textures.individual.source = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
461
462   template.format = PIPE_FORMAT_R16_SNORM;
463   template.width0 = idct->destination->width0;
464   template.usage = PIPE_USAGE_STATIC;
465   idct->textures.individual.intermediate = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
466
467   for (i = 0; i < 4; ++i) {
468      if(idct->textures.all[i] == NULL)
469         return false; /* a texture failed to allocate */
470
471      u_sampler_view_default_template(&sampler_view, idct->textures.all[i], idct->textures.all[i]->format);
472      idct->sampler_views.all[i] = idct->pipe->create_sampler_view(idct->pipe, idct->textures.all[i], &sampler_view);
473   }
474
475   idct->vertex_bufs.individual.quad.stride = sizeof(struct vertex2f);
476   idct->vertex_bufs.individual.quad.max_index = 4 * idct->max_blocks - 1;
477   idct->vertex_bufs.individual.quad.buffer_offset = 0;
478   idct->vertex_bufs.individual.quad.buffer = pipe_buffer_create
479   (
480      idct->pipe->screen,
481      PIPE_BIND_VERTEX_BUFFER,
482      sizeof(struct vertex2f) * 4 * idct->max_blocks
483   );
484
485   if(idct->vertex_bufs.individual.quad.buffer == NULL)
486      return false;
487
488   idct->vertex_bufs.individual.pos.stride = sizeof(struct vertex2f);
489   idct->vertex_bufs.individual.pos.max_index = 4 * idct->max_blocks - 1;
490   idct->vertex_bufs.individual.pos.buffer_offset = 0;
491   idct->vertex_bufs.individual.pos.buffer = pipe_buffer_create
492   (
493      idct->pipe->screen,
494      PIPE_BIND_VERTEX_BUFFER,
495      sizeof(struct vertex2f) * 4 * idct->max_blocks
496   );
497
498   if(idct->vertex_bufs.individual.pos.buffer == NULL)
499      return false;
500
501   /* Rect element */
502   vertex_elems[0].src_offset = 0;
503   vertex_elems[0].instance_divisor = 0;
504   vertex_elems[0].vertex_buffer_index = 0;
505   vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
506
507   /* Pos element */
508   vertex_elems[1].src_offset = 0;
509   vertex_elems[1].instance_divisor = 0;
510   vertex_elems[1].vertex_buffer_index = 1;
511   vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
512
513   idct->vertex_elems_state = idct->pipe->create_vertex_elements_state(idct->pipe, 2, vertex_elems);
514
515   return true;
516}
517
518static void
519cleanup_buffers(struct vl_idct *idct)
520{
521   unsigned i;
522
523   assert(idct);
524
525   for (i = 0; i < 4; ++i) {
526      pipe_sampler_view_reference(&idct->sampler_views.all[i], NULL);
527      pipe_resource_reference(&idct->textures.all[i], NULL);
528   }
529
530   idct->pipe->delete_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
531   pipe_resource_reference(&idct->vertex_bufs.individual.quad.buffer, NULL);
532   pipe_resource_reference(&idct->vertex_bufs.individual.pos.buffer, NULL);
533}
534
535static void
536init_constants(struct vl_idct *idct)
537{
538   struct pipe_transfer *buf_transfer;
539   struct vertex2f *v;
540
541   unsigned i;
542
543   /* quad vectors */
544   v = pipe_buffer_map
545   (
546      idct->pipe,
547      idct->vertex_bufs.individual.quad.buffer,
548      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
549      &buf_transfer
550   );
551   for ( i = 0; i < idct->max_blocks; ++i)
552     memcpy(v + i * 4, &const_quad, sizeof(const_quad));
553   pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.quad.buffer, buf_transfer);
554}
555
556static void
557init_state(struct vl_idct *idct)
558{
559   struct pipe_sampler_state sampler;
560   unsigned i;
561
562   idct->num_blocks = 0;
563   idct->num_empty_blocks = 0;
564
565   idct->viewport[0].scale[0] = idct->textures.individual.intermediate->width0;
566   idct->viewport[0].scale[1] = idct->textures.individual.intermediate->height0;
567
568   idct->viewport[1].scale[0] = idct->destination->width0;
569   idct->viewport[1].scale[1] = idct->destination->height0;
570
571   idct->fb_state[0].width = idct->textures.individual.intermediate->width0;
572   idct->fb_state[0].height = idct->textures.individual.intermediate->height0;
573
574   idct->fb_state[1].width = idct->destination->width0;
575   idct->fb_state[1].height = idct->destination->height0;
576
577   for(i = 0; i < 2; ++i) {
578      idct->viewport[i].scale[2] = 1;
579      idct->viewport[i].scale[3] = 1;
580      idct->viewport[i].translate[0] = 0;
581      idct->viewport[i].translate[1] = 0;
582      idct->viewport[i].translate[2] = 0;
583      idct->viewport[i].translate[3] = 0;
584
585      idct->fb_state[i].nr_cbufs = 1;
586      idct->fb_state[i].zsbuf = NULL;
587   }
588
589   for (i = 0; i < 4; ++i) {
590      memset(&sampler, 0, sizeof(sampler));
591      sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
592      sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
593      sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
594      sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
595      sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
596      sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
597      sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
598      sampler.compare_func = PIPE_FUNC_ALWAYS;
599      sampler.normalized_coords = 1;
600      /*sampler.shadow_ambient = ; */
601      /*sampler.lod_bias = ; */
602      sampler.min_lod = 0;
603      /*sampler.max_lod = ; */
604      /*sampler.border_color[0] = ; */
605      /*sampler.max_anisotropy = ; */
606      idct->samplers.all[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler);
607   }
608}
609
610static void
611cleanup_state(struct vl_idct *idct)
612{
613   unsigned i;
614
615   for (i = 0; i < 4; ++i)
616      idct->pipe->delete_sampler_state(idct->pipe, idct->samplers.all[i]);
617}
618
619struct pipe_resource *
620vl_idct_upload_matrix(struct pipe_context *pipe)
621{
622   struct pipe_resource template, *matrix;
623   struct pipe_transfer *buf_transfer;
624   unsigned i, j, pitch;
625   float *f;
626
627   struct pipe_box rect =
628   {
629      0, 0, 0,
630      BLOCK_WIDTH,
631      BLOCK_HEIGHT,
632      1
633   };
634
635   memset(&template, 0, sizeof(struct pipe_resource));
636   template.target = PIPE_TEXTURE_2D;
637   template.format = PIPE_FORMAT_R32G32B32A32_FLOAT;
638   template.last_level = 0;
639   template.width0 = 2;
640   template.height0 = 8;
641   template.depth0 = 1;
642   template.usage = PIPE_USAGE_IMMUTABLE;
643   template.bind = PIPE_BIND_SAMPLER_VIEW;
644   template.flags = 0;
645
646   matrix = pipe->screen->resource_create(pipe->screen, &template);
647
648   /* matrix */
649   buf_transfer = pipe->get_transfer
650   (
651      pipe, matrix,
652      u_subresource(0, 0),
653      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
654      &rect
655   );
656   pitch = buf_transfer->stride / sizeof(float);
657
658   f = pipe->transfer_map(pipe, buf_transfer);
659   for(i = 0; i < BLOCK_HEIGHT; ++i)
660      for(j = 0; j < BLOCK_WIDTH; ++j)
661         f[i * pitch + j] = const_matrix[j][i]; // transpose
662
663   pipe->transfer_unmap(pipe, buf_transfer);
664   pipe->transfer_destroy(pipe, buf_transfer);
665
666   return matrix;
667}
668
669bool
670vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resource *dst, struct pipe_resource *matrix)
671{
672   assert(idct && pipe && dst);
673
674   idct->pipe = pipe;
675   pipe_resource_reference(&idct->textures.individual.matrix, matrix);
676   pipe_resource_reference(&idct->textures.individual.transpose, matrix);
677   pipe_resource_reference(&idct->destination, dst);
678
679   if(!init_buffers(idct))
680      return false;
681
682   if(!init_shaders(idct)) {
683      cleanup_buffers(idct);
684      return false;
685   }
686
687   init_state(idct);
688
689   idct->surfaces.intermediate = idct->pipe->screen->get_tex_surface(
690      idct->pipe->screen, idct->textures.individual.intermediate, 0, 0, 0,
691      PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
692   idct->fb_state[0].cbufs[0] = idct->surfaces.intermediate;
693
694   idct->surfaces.destination = idct->pipe->screen->get_tex_surface(
695      idct->pipe->screen, idct->destination, 0, 0, 0,
696      PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
697   idct->fb_state[1].cbufs[0] = idct->surfaces.destination;
698
699   init_constants(idct);
700   xfer_buffers_map(idct);
701
702   return true;
703}
704
705void
706vl_idct_cleanup(struct vl_idct *idct)
707{
708   idct->pipe->screen->tex_surface_destroy(idct->surfaces.destination);
709   idct->pipe->screen->tex_surface_destroy(idct->surfaces.intermediate);
710
711   cleanup_shaders(idct);
712   cleanup_buffers(idct);
713
714   cleanup_state(idct);
715
716   pipe_resource_reference(&idct->destination, NULL);
717}
718
719void
720vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
721{
722   struct vertex2f v, *v_dst;
723
724   unsigned tex_pitch;
725   unsigned nr_components;
726   short *texels;
727
728   unsigned i;
729
730   assert(idct);
731
732   if(block) {
733      nr_components = util_format_get_nr_components(idct->tex_transfer->resource->format);
734
735      tex_pitch = idct->tex_transfer->stride / sizeof(short);
736      texels = idct->texels + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH;
737
738      for (i = 0; i < BLOCK_HEIGHT; ++i)
739         memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * sizeof(short));
740
741      /* non empty blocks fills the vector buffer from left to right */
742      v_dst = idct->vectors + idct->num_blocks * 4;
743
744      idct->num_blocks++;
745
746   } else {
747
748      /* while empty blocks fills the vector buffer from right to left */
749      v_dst = idct->vectors + (idct->max_blocks - idct->num_empty_blocks) * 4 - 4;
750
751      idct->num_empty_blocks++;
752   }
753
754   v.x = x;
755   v.y = y;
756
757   for (i = 0; i < 4; ++i) {
758      v_dst[i] = v;
759   }
760}
761
762void
763vl_idct_flush(struct vl_idct *idct)
764{
765   xfer_buffers_unmap(idct);
766
767   if(idct->num_blocks > 0) {
768
769      /* first stage */
770      idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[0]);
771      idct->pipe->set_viewport_state(idct->pipe, &idct->viewport[0]);
772
773      idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
774      idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
775      idct->pipe->set_fragment_sampler_views(idct->pipe, 2, idct->sampler_views.stage[0]);
776      idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[0]);
777      idct->pipe->bind_vs_state(idct->pipe, idct->matrix_vs);
778      idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
779
780      util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4);
781
782      /* second stage */
783      idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[1]);
784      idct->pipe->set_viewport_state(idct->pipe, &idct->viewport[1]);
785
786      idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
787      idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
788      idct->pipe->set_fragment_sampler_views(idct->pipe, 2, idct->sampler_views.stage[1]);
789      idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[1]);
790      idct->pipe->bind_vs_state(idct->pipe, idct->transpose_vs);
791      idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
792
793      util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4);
794   }
795
796   if(idct->num_empty_blocks > 0) {
797
798      /* empty block handling */
799      idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[1]);
800      idct->pipe->set_viewport_state(idct->pipe, &idct->viewport[1]);
801
802      idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
803      idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
804      idct->pipe->bind_vs_state(idct->pipe, idct->eb_vs);
805      idct->pipe->bind_fs_state(idct->pipe, idct->eb_fs);
806
807      util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS,
808         (idct->max_blocks - idct->num_empty_blocks) * 4,
809         idct->num_empty_blocks * 4);
810   }
811
812   idct->num_blocks = 0;
813   idct->num_empty_blocks = 0;
814   xfer_buffers_map(idct);
815}
816