vl_idct.c revision ab130400cf91ab471e265e58193c95f04c7aeeda
195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley/**************************************************************************
295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley *
395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * Copyright 2010 Christian König
495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * All Rights Reserved.
595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley *
695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * Permission is hereby granted, free of charge, to any person obtaining a
795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * copy of this software and associated documentation files (the
895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * "Software"), to deal in the Software without restriction, including
995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * without limitation the rights to use, copy, modify, merge, publish,
1095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * distribute, sub license, and/or sell copies of the Software, and to
1195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * permit persons to whom the Software is furnished to do so, subject to
1295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * the following conditions:
1395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley *
1495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * The above copyright notice and this permission notice (including the
1595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * next paragraph) shall be included in all copies or substantial portions
1695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * of the Software.
1795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley *
1895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
1995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
2095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
2195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
2295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
2395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
2495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley *
2695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley **************************************************************************/
2795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
2895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include "vl_idct.h"
2995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include "vl_vertex_buffers.h"
3095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include "util/u_draw.h"
3195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include <assert.h>
3295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include <pipe/p_context.h>
3395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include <pipe/p_screen.h>
3495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include <util/u_inlines.h>
3595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include <util/u_sampler.h>
3695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include <util/u_format.h>
3795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include <tgsi/tgsi_ureg.h>
3895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include "vl_types.h"
3995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
4095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#define BLOCK_WIDTH 8
4195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#define BLOCK_HEIGHT 8
4295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
4395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f)
4495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
4595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#define STAGE1_SCALE 4.0f
4695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#define STAGE2_SCALE (SCALE_FACTOR_16_TO_9 / STAGE1_SCALE / STAGE1_SCALE)
4795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
4895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#define NR_RENDER_TARGETS 4
4995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
5095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyenum VS_INPUT
5195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley{
5295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   VS_I_RECT,
5395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   VS_I_VPOS,
5495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
5595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   NUM_VS_INPUTS
5695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley};
5795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
5895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyenum VS_OUTPUT
5995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley{
6095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   VS_O_VPOS,
6195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   VS_O_BLOCK,
6295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   VS_O_TEX,
6395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   VS_O_START
6495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley};
6595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
6695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleystatic const float const_matrix[8][8] = {
6795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   {  0.3535530f,  0.3535530f,  0.3535530f,  0.3535530f,  0.3535530f,  0.3535530f,  0.353553f,  0.3535530f },
6895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   {  0.4903930f,  0.4157350f,  0.2777850f,  0.0975451f, -0.0975452f, -0.2777850f, -0.415735f, -0.4903930f },
6995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   {  0.4619400f,  0.1913420f, -0.1913420f, -0.4619400f, -0.4619400f, -0.1913420f,  0.191342f,  0.4619400f },
7095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   {  0.4157350f, -0.0975452f, -0.4903930f, -0.2777850f,  0.2777850f,  0.4903930f,  0.097545f, -0.4157350f },
7195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   {  0.3535530f, -0.3535530f, -0.3535530f,  0.3535540f,  0.3535530f, -0.3535540f, -0.353553f,  0.3535530f },
7295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   {  0.2777850f, -0.4903930f,  0.0975452f,  0.4157350f, -0.4157350f, -0.0975451f,  0.490393f, -0.2777850f },
7395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   {  0.1913420f, -0.4619400f,  0.4619400f, -0.1913420f, -0.1913410f,  0.4619400f, -0.461940f,  0.1913420f },
7495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   {  0.0975451f, -0.2777850f,  0.4157350f, -0.4903930f,  0.4903930f, -0.4157350f,  0.277786f, -0.0975458f }
7595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley};
7695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
7795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleystatic void *
7895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleycreate_vert_shader(struct vl_idct *idct)
7995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley{
8095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   struct ureg_program *shader;
8195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   struct ureg_src scale;
8295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   struct ureg_src vrect, vpos;
8395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   struct ureg_dst t_vpos;
8495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   struct ureg_dst o_vpos, o_block, o_tex, o_start;
8595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
8695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
8795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   if (!shader)
8895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      return NULL;
8995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
9095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   t_vpos = ureg_DECL_temporary(shader);
9195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
9295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
9395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
9495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
9595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
9695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   o_block = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK);
9795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   o_tex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX);
9895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   o_start = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_START);
9995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
10095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   /*
10195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height)
10295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    *
10395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    * t_vpos = vpos + vrect
10495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    * o_vpos.xy = t_vpos * scale
10595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    * o_vpos.zw = vpos
10695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    *
10795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    * o_block = vrect
10895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    * o_tex = t_pos
10995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    * o_start = vpos * scale
11095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    *
11195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    */
11295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   scale = ureg_imm2f(shader,
11395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      (float)BLOCK_WIDTH / idct->buffer_width,
11495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      (float)BLOCK_HEIGHT / idct->buffer_height);
11595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
11695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect);
11795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), scale);
11895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
11995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
12095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
12195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   ureg_MOV(shader, ureg_writemask(o_block, TGSI_WRITEMASK_XY), vrect);
12295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   ureg_MOV(shader, ureg_writemask(o_block, TGSI_WRITEMASK_Z), ureg_imm1f(shader, 0.0f));
12395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
12430ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin   ureg_MOV(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
12530ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin#if NR_RENDER_TARGETS == 1
12630ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin   ureg_MOV(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_Z), ureg_imm1f(shader, 0.0f));
12730ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin#else
12830ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin   ureg_MUL(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_Z),
12930ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin      ureg_scalar(vrect, TGSI_SWIZZLE_X),
13030ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin      ureg_imm1f(shader, BLOCK_WIDTH / NR_RENDER_TARGETS));
13195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
13295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
13395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   ureg_MUL(shader, ureg_writemask(o_start, TGSI_WRITEMASK_XY), vpos, scale);
13495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
13595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   ureg_release_temporary(shader, t_vpos);
13695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
13795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   ureg_END(shader);
13895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
13995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   return ureg_create_shader_and_destroy(shader, idct->pipe);
14095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley}
14195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
14295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleystatic void
14395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyfetch_four(struct ureg_program *shader, struct ureg_dst m[2],
14495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley           struct ureg_src tc, struct ureg_src sampler,
14595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley           struct ureg_src start, bool right_side,
14695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley           bool transposed, float size)
14795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley{
14895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   struct ureg_dst t_tc;
14995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
15095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
15195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
15295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   t_tc = ureg_DECL_temporary(shader);
15395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   m[0] = ureg_DECL_temporary(shader);
15495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   m[1] = ureg_DECL_temporary(shader);
15595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
15695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   /*
15795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    * t_tc.x = right_side ? start.x : tc.x
15895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    * t_tc.y = right_side ? tc.y : start.y
15995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    * m[0..1] = tex(t_tc++, sampler)
16095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    */
16195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   if(!right_side) {
16295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ureg_MOV(shader, ureg_writemask(t_tc, wm_start), ureg_scalar(start, TGSI_SWIZZLE_X));
16395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ureg_MOV(shader, ureg_writemask(t_tc, wm_tc), ureg_scalar(tc, TGSI_SWIZZLE_Y));
16495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   } else {
16595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ureg_MOV(shader, ureg_writemask(t_tc, wm_start), ureg_scalar(start, TGSI_SWIZZLE_Y));
16695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ureg_MOV(shader, ureg_writemask(t_tc, wm_tc), ureg_scalar(tc, TGSI_SWIZZLE_X));
16795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   }
16895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   ureg_FRC(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), tc);
16995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
17095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   ureg_TEX(shader, m[0], TGSI_TEXTURE_3D, ureg_src(t_tc), sampler);
17195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   ureg_ADD(shader, ureg_writemask(t_tc, wm_start), ureg_src(t_tc), ureg_imm1f(shader, 1.0f / size));
17295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   ureg_TEX(shader, m[1], TGSI_TEXTURE_3D, ureg_src(t_tc), sampler);
17395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
17495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   ureg_release_temporary(shader, t_tc);
17595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley}
17695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
17795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleystatic void
17895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleymatrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2])
17995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley{
18095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   struct ureg_dst tmp[2];
18195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   unsigned i;
18295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
18395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   for(i = 0; i < 2; ++i) {
18495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      tmp[i] = ureg_DECL_temporary(shader);
18595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   }
18695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
18795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   /*
18895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    * tmp[0..1] = dot4(m[0][0..1], m[1][0..1])
18995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    * dst = tmp[0] + tmp[1]
19095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    */
19195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   ureg_DP4(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0]));
19295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   ureg_DP4(shader, ureg_writemask(tmp[1], TGSI_WRITEMASK_X), ureg_src(l[1]), ureg_src(r[1]));
19395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   ureg_ADD(shader, dst,
19495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ureg_scalar(ureg_src(tmp[0]), TGSI_SWIZZLE_X),
19595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ureg_scalar(ureg_src(tmp[1]), TGSI_SWIZZLE_X));
19695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
19795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   for(i = 0; i < 2; ++i) {
19895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ureg_release_temporary(shader, tmp[i]);
19995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   }
20095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley}
20195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
20295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleystatic void *
20395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleycreate_transpose_frag_shader(struct vl_idct *idct)
20495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley{
20595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   struct ureg_program *shader;
20695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
20795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   struct ureg_src block, tex, sampler[2];
20895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   struct ureg_src start[2];
20995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
21095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   struct ureg_dst l[2], r[2];
21195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   struct ureg_dst tmp, fragment;
21295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
21395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
21495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   if (!shader)
21595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      return NULL;
21695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
21795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   block = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
21895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   tex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_CONSTANT);
21995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
22095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   sampler[0] = ureg_DECL_sampler(shader, 0);
22195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   sampler[1] = ureg_DECL_sampler(shader, 1);
22295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
22395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   start[0] = ureg_imm1f(shader, 0.0f);
22495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   start[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
22595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
22695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   fetch_four(shader, l, block, sampler[0], start[0], false, false, BLOCK_WIDTH / 4);
22795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   fetch_four(shader, r, tex, sampler[1], start[1], true, false, idct->buffer_height / 4);
22895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
22995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
23095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
23195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   tmp = ureg_DECL_temporary(shader);
23295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   matrix_mul(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), l, r);
23395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   ureg_MUL(shader, fragment, ureg_src(tmp), ureg_imm1f(shader, STAGE2_SCALE));
23495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
23595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   ureg_release_temporary(shader, tmp);
23695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   ureg_release_temporary(shader, l[0]);
23795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   ureg_release_temporary(shader, l[1]);
23895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   ureg_release_temporary(shader, r[0]);
23995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   ureg_release_temporary(shader, r[1]);
24095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
24195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   ureg_END(shader);
24295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
24395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   return ureg_create_shader_and_destroy(shader, idct->pipe);
24495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley}
24595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
24695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleystatic void *
24795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleycreate_matrix_frag_shader(struct vl_idct *idct)
24895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley{
24995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   struct ureg_program *shader;
25095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
25195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   struct ureg_src tex, block, sampler[2];
25295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   struct ureg_src start[2];
25395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
25495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   struct ureg_dst l[4][2], r[2];
25595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   struct ureg_dst t_tc, tmp, fragment[NR_RENDER_TARGETS];
25695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
25795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   unsigned i, j;
25895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
25995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
26095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   if (!shader)
26195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      return NULL;
26295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
26395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   t_tc = ureg_DECL_temporary(shader);
26495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   tmp = ureg_DECL_temporary(shader);
26595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
26695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   tex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR);
26795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   block = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
2680eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin
26995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   sampler[0] = ureg_DECL_sampler(shader, 1);
27095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   sampler[1] = ureg_DECL_sampler(shader, 0);
27195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
27295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   start[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
27395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   start[1] = ureg_imm1f(shader, 0.0f);
27495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
27595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   for (i = 0; i < NR_RENDER_TARGETS; ++i)
27695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley       fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i);
27795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
27895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   for (i = 0; i < 4; ++i) {
27995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      if(i == 0)
28095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley         ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), tex);
28195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      else
2827f520dbd8d4e0006f6c8279c681ee149c961c104David Benjamin         ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y),
28395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley            ureg_src(t_tc), ureg_imm1f(shader, 1.0f / idct->buffer_height));
28495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
28595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      fetch_four(shader, l[i], ureg_src(t_tc), sampler[0], start[0], false, false, idct->buffer_width / 4);
28695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   }
28795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
28895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   for (i = 0; i < NR_RENDER_TARGETS; ++i) {
28995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
29095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#if NR_RENDER_TARGETS == 1
29195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      fetch_four(shader, r, block, sampler[1], start[1], true, true, BLOCK_WIDTH / 4);
29295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#else
29395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X),
29495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley         ureg_imm1f(shader, 1.0f / BLOCK_WIDTH * i),
29595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley         block);
29695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      fetch_four(shader, r, ureg_src(t_tc), sampler[1], start[1], true, true, BLOCK_WIDTH / 4);
29730ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin#endif
29830ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin
29995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      for (j = 0; j < 4; ++j) {
30030ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin         matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r);
30130ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin      }
30230ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin      ureg_release_temporary(shader, r[0]);
30330ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin      ureg_release_temporary(shader, r[1]);
30430ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin   }
30530ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin
30630ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin   ureg_release_temporary(shader, t_tc);
30795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   ureg_release_temporary(shader, tmp);
30895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
30995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   for (i = 0; i < 4; ++i) {
31095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ureg_release_temporary(shader, l[i][0]);
31195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ureg_release_temporary(shader, l[i][1]);
31295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   }
31395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
31495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   ureg_END(shader);
31595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
31695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   return ureg_create_shader_and_destroy(shader, idct->pipe);
31795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley}
31895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
31995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleystatic bool
32095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyinit_shaders(struct vl_idct *idct)
32195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley{
32295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   idct->vs = create_vert_shader(idct);
32395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   idct->matrix_fs = create_matrix_frag_shader(idct);
32495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   idct->transpose_fs = create_transpose_frag_shader(idct);
32595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
32695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   return
32795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      idct->vs != NULL &&
32895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      idct->transpose_fs != NULL &&
32995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      idct->matrix_fs != NULL;
33095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley}
33195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
33295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleystatic void
33395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleycleanup_shaders(struct vl_idct *idct)
33495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley{
33595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   idct->pipe->delete_vs_state(idct->pipe, idct->vs);
33695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs);
33795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   idct->pipe->delete_fs_state(idct->pipe, idct->transpose_fs);
33895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley}
33995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
34095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleystatic bool
34195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyinit_state(struct vl_idct *idct)
34295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley{
34395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS];
34495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   struct pipe_sampler_state sampler;
34595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   struct pipe_rasterizer_state rs_state;
3460eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   unsigned i;
34795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
3480eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   assert(idct);
3490eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin
3500eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   idct->quad = vl_vb_upload_quads(idct->pipe, idct->max_blocks);
3510eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin
3520eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   if(idct->quad.buffer == NULL)
35395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      return false;
3540eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin
3550eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   for (i = 0; i < 4; ++i) {
35695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      memset(&sampler, 0, sizeof(sampler));
3570eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin      sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
3580eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin      sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
3590eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin      sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
36095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
36130ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin      sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
36230ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin      sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
36330ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin      sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
36430ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin      sampler.compare_func = PIPE_FUNC_ALWAYS;
36530ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin      sampler.normalized_coords = 1;
36630ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin      /*sampler.shadow_ambient = ; */
36730ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin      /*sampler.lod_bias = ; */
36830ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin      sampler.min_lod = 0;
36930ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin      /*sampler.max_lod = ; */
37030ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin      /*sampler.border_color[0] = ; */
37130ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin      /*sampler.max_anisotropy = ; */
37230ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin      idct->samplers.all[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler);
37330ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin   }
37430ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin
37530ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin   memset(&rs_state, 0, sizeof(rs_state));
37630ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin   /*rs_state.sprite_coord_enable */
37795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   rs_state.sprite_coord_mode = PIPE_SPRITE_COORD_UPPER_LEFT;
3780eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   rs_state.point_quad_rasterization = true;
37939482a13aca033b72118807fadd152207e0fad8aDavid Benjamin   rs_state.point_size = BLOCK_WIDTH;
3800eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   rs_state.gl_rasterization_rules = false;
3810eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state);
3820eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin
3830eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element();
3840eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin
3850eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   /* Pos element */
3860eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R32G32_FLOAT;
38795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
3880eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   idct->vertex_buffer_stride = vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 1, 1);
3890eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   idct->vertex_elems_state = idct->pipe->create_vertex_elements_state(idct->pipe, 2, vertex_elems);
3900eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin
39195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   return true;
3920eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin}
3930eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin
3940eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjaminstatic void
3950eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamincleanup_state(struct vl_idct *idct)
3960eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin{
3970eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   unsigned i;
398b0c235ed366d10674542db784668fe3e13f23709Adam Langley
3990eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   for (i = 0; i < 4; ++i)
4000eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin      idct->pipe->delete_sampler_state(idct->pipe, idct->samplers.all[i]);
4010eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin
4020eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);
4030eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   idct->pipe->delete_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
4040eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin}
4050eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin
4060eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjaminstatic bool
40795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyinit_textures(struct vl_idct *idct, struct vl_idct_buffer *buffer)
4080eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin{
40995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   struct pipe_resource template;
4100eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   struct pipe_sampler_view sampler_view;
4110eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   unsigned i;
4120eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin
4130eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   assert(idct && buffer);
41495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
4150eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   /* create textures */
41695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   memset(&template, 0, sizeof(struct pipe_resource));
4170eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   template.last_level = 0;
4180eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   template.depth0 = 1;
4190eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   template.bind = PIPE_BIND_SAMPLER_VIEW;
4200eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   template.flags = 0;
42195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
42295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   template.target = PIPE_TEXTURE_2D;
4230eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   template.format = PIPE_FORMAT_R16G16B16A16_SNORM;
4240eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   template.width0 = idct->buffer_width / 4;
4250eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   template.height0 = idct->buffer_height;
4260eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   template.depth0 = 1;
4270eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   template.usage = PIPE_USAGE_STREAM;
4280eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   buffer->textures.individual.source = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
4290eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin
4300eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   template.target = PIPE_TEXTURE_3D;
4310eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   template.format = PIPE_FORMAT_R16G16B16A16_SNORM;
4320eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   template.width0 = idct->buffer_width / NR_RENDER_TARGETS;
4330eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   template.height0 = idct->buffer_height / 4;
4340eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   template.depth0 = NR_RENDER_TARGETS;
4350eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   template.usage = PIPE_USAGE_STATIC;
4360eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   buffer->textures.individual.intermediate = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
4370eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin
4380eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   for (i = 0; i < 4; ++i) {
4390eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin      if(buffer->textures.all[i] == NULL)
4400eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin         return false; /* a texture failed to allocate */
4410eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin
44295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      u_sampler_view_default_template(&sampler_view, buffer->textures.all[i], buffer->textures.all[i]->format);
44395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      buffer->sampler_views.all[i] = idct->pipe->create_sampler_view(idct->pipe, buffer->textures.all[i], &sampler_view);
44495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   }
44595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
44695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   return true;
44795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley}
44895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
44995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleystatic void
45095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleycleanup_textures(struct vl_idct *idct, struct vl_idct_buffer *buffer)
45195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley{
45295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   unsigned i;
4530eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin
4540eb17906ab03bd3f8d2e83bc08abf94caf0ff489David Benjamin   assert(idct && buffer);
45595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
45695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   for (i = 0; i < 4; ++i) {
45795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      pipe_sampler_view_reference(&buffer->sampler_views.all[i], NULL);
45895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      pipe_resource_reference(&buffer->textures.all[i], NULL);
45995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   }
46095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley}
46195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
46295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleystatic bool
46395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyinit_vertex_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer)
46495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley{
46595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   assert(idct && buffer);
46695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
46795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   buffer->vertex_bufs.individual.quad.stride = idct->quad.stride;
46895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   buffer->vertex_bufs.individual.quad.max_index = idct->quad.max_index;
46995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   buffer->vertex_bufs.individual.quad.buffer_offset = idct->quad.buffer_offset;
47095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, idct->quad.buffer);
47195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
47295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   buffer->vertex_bufs.individual.pos = vl_vb_init(
47395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      &buffer->blocks, idct->pipe, idct->max_blocks, 2,
47495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      idct->vertex_buffer_stride);
47595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
47695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   if(buffer->vertex_bufs.individual.pos.buffer == NULL)
47795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      return false;
47895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
47995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   return true;
48095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley}
48195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
48295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleystatic void
48395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleycleanup_vertex_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer)
48495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley{
48595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   assert(idct && buffer);
48695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
48795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, NULL);
48895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   pipe_resource_reference(&buffer->vertex_bufs.individual.pos.buffer, NULL);
48995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
49095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   vl_vb_cleanup(&buffer->blocks);
49195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley}
49295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
49395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleystruct pipe_resource *
49495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyvl_idct_upload_matrix(struct pipe_context *pipe)
49595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley{
49695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   struct pipe_resource template, *matrix;
49795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   struct pipe_transfer *buf_transfer;
49895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   unsigned i, j, pitch;
49995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   float *f;
50095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
50195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   struct pipe_box rect =
50295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   {
50395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      0, 0, 0,
50495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      BLOCK_WIDTH / 4,
50595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      BLOCK_HEIGHT,
50695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      1
50795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   };
50895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
50995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   memset(&template, 0, sizeof(struct pipe_resource));
51095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   template.target = PIPE_TEXTURE_2D;
51195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   template.format = PIPE_FORMAT_R32G32B32A32_FLOAT;
51295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   template.last_level = 0;
51395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   template.width0 = 2;
51495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   template.height0 = 8;
51595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   template.depth0 = 1;
51695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   template.usage = PIPE_USAGE_IMMUTABLE;
51795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   template.bind = PIPE_BIND_SAMPLER_VIEW;
51895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   template.flags = 0;
51995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
52095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   matrix = pipe->screen->resource_create(pipe->screen, &template);
52195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
52295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   /* matrix */
52395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   buf_transfer = pipe->get_transfer
52495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   (
52595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      pipe, matrix,
52695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      u_subresource(0, 0),
52795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
52895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      &rect
52995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   );
53095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   pitch = buf_transfer->stride / sizeof(float);
53195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
53295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   f = pipe->transfer_map(pipe, buf_transfer);
53395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   for(i = 0; i < BLOCK_HEIGHT; ++i)
53495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      for(j = 0; j < BLOCK_WIDTH; ++j)
53595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley         // transpose and scale
53695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley         f[i * pitch + j] = const_matrix[j][i] * STAGE1_SCALE;
53795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
53895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   pipe->transfer_unmap(pipe, buf_transfer);
53995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   pipe->transfer_destroy(pipe, buf_transfer);
54095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
54195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   return matrix;
54295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley}
54395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
54495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleybool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
54595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley                  unsigned buffer_width, unsigned buffer_height,
54695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley                  struct pipe_resource *matrix)
54795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley{
54895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   assert(idct && pipe && matrix);
54995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
55095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   idct->pipe = pipe;
55195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   idct->buffer_width = buffer_width;
55295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   idct->buffer_height = buffer_height;
55395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   pipe_resource_reference(&idct->matrix, matrix);
55495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
55595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   idct->max_blocks =
55695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      align(buffer_width, BLOCK_WIDTH) / BLOCK_WIDTH *
55795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      align(buffer_height, BLOCK_HEIGHT) / BLOCK_HEIGHT;
55895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
55995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   if(!init_shaders(idct))
56095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      return false;
56195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
56295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   if(!init_state(idct)) {
56395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      cleanup_shaders(idct);
56495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      return false;
56595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   }
56695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
56795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   return true;
56895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley}
56995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
57095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyvoid
57195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyvl_idct_cleanup(struct vl_idct *idct)
57295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley{
57330ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin   cleanup_shaders(idct);
57430ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin   cleanup_state(idct);
57530ddb434bfb845356fbacb6b2bd51f8814c7043cDavid Benjamin
57695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley   pipe_resource_reference(&idct->matrix, NULL);
57795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley}
57895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
57995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleybool
58095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyvl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, struct pipe_resource *dst)
58195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley{
582   unsigned i;
583
584   assert(buffer);
585   assert(idct);
586   assert(dst);
587
588   pipe_resource_reference(&buffer->textures.individual.matrix, idct->matrix);
589   pipe_resource_reference(&buffer->textures.individual.transpose, idct->matrix);
590   pipe_resource_reference(&buffer->destination, dst);
591
592   if (!init_textures(idct, buffer))
593      return false;
594
595   if (!init_vertex_buffers(idct, buffer))
596      return false;
597
598   /* init state */
599   buffer->viewport[0].scale[0] = buffer->textures.individual.intermediate->width0;
600   buffer->viewport[0].scale[1] = buffer->textures.individual.intermediate->height0;
601
602   buffer->viewport[1].scale[0] = buffer->destination->width0;
603   buffer->viewport[1].scale[1] = buffer->destination->height0;
604
605   buffer->fb_state[0].width = buffer->textures.individual.intermediate->width0;
606   buffer->fb_state[0].height = buffer->textures.individual.intermediate->height0;
607
608   buffer->fb_state[0].nr_cbufs = NR_RENDER_TARGETS;
609   for(i = 0; i < NR_RENDER_TARGETS; ++i) {
610      buffer->fb_state[0].cbufs[i] = idct->pipe->screen->get_tex_surface(
611         idct->pipe->screen, buffer->textures.individual.intermediate, 0, 0, i,
612         PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
613   }
614
615   buffer->fb_state[1].width = buffer->destination->width0;
616   buffer->fb_state[1].height = buffer->destination->height0;
617
618   buffer->fb_state[1].nr_cbufs = 1;
619   buffer->fb_state[1].cbufs[0] = idct->pipe->screen->get_tex_surface(
620      idct->pipe->screen, buffer->destination, 0, 0, 0,
621      PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
622
623   for(i = 0; i < 2; ++i) {
624      buffer->viewport[i].scale[2] = 1;
625      buffer->viewport[i].scale[3] = 1;
626      buffer->viewport[i].translate[0] = 0;
627      buffer->viewport[i].translate[1] = 0;
628      buffer->viewport[i].translate[2] = 0;
629      buffer->viewport[i].translate[3] = 0;
630
631      buffer->fb_state[i].zsbuf = NULL;
632   }
633
634   return true;
635}
636
637void
638vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer)
639{
640   unsigned i;
641
642   assert(buffer);
643
644   for(i = 0; i < NR_RENDER_TARGETS; ++i) {
645      idct->pipe->screen->tex_surface_destroy(buffer->fb_state[0].cbufs[i]);
646   }
647
648   idct->pipe->screen->tex_surface_destroy(buffer->fb_state[1].cbufs[0]);
649
650   cleanup_textures(idct, buffer);
651   cleanup_vertex_buffers(idct, buffer);
652}
653
654void
655vl_idct_map_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer)
656{
657   assert(idct);
658
659   struct pipe_box rect =
660   {
661      0, 0, 0,
662      buffer->textures.individual.source->width0,
663      buffer->textures.individual.source->height0,
664      1
665   };
666
667   buffer->tex_transfer = idct->pipe->get_transfer
668   (
669      idct->pipe, buffer->textures.individual.source,
670      u_subresource(0, 0),
671      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
672      &rect
673   );
674
675   buffer->texels = idct->pipe->transfer_map(idct->pipe, buffer->tex_transfer);
676
677   vl_vb_map(&buffer->blocks, idct->pipe);
678}
679
680void
681vl_idct_add_block(struct vl_idct_buffer *buffer, unsigned x, unsigned y, short *block)
682{
683   struct vertex2f v;
684   unsigned tex_pitch;
685   short *texels;
686
687   unsigned i;
688
689   assert(buffer);
690
691   tex_pitch = buffer->tex_transfer->stride / sizeof(short);
692   texels = buffer->texels + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH;
693
694   for (i = 0; i < BLOCK_HEIGHT; ++i)
695      memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * sizeof(short));
696
697   v.x = x;
698   v.y = y;
699   vl_vb_add_block(&buffer->blocks, (float*)&v);
700}
701
702void
703vl_idct_unmap_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer)
704{
705   assert(idct && buffer);
706
707   idct->pipe->transfer_unmap(idct->pipe, buffer->tex_transfer);
708   idct->pipe->transfer_destroy(idct->pipe, buffer->tex_transfer);
709   vl_vb_unmap(&buffer->blocks, idct->pipe);
710}
711
712void
713vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer)
714{
715   unsigned num_verts;
716
717   assert(idct);
718
719   num_verts = vl_vb_restart(&buffer->blocks);
720
721   if(num_verts > 0) {
722
723      idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
724      idct->pipe->set_vertex_buffers(idct->pipe, 2, buffer->vertex_bufs.all);
725      idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
726      idct->pipe->bind_vs_state(idct->pipe, idct->vs);
727
728      /* first stage */
729      idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[0]);
730      idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport[0]);
731      idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]);
732      idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[0]);
733      idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
734      util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts);
735
736      /* second stage */
737      idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[1]);
738      idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport[1]);
739      idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[1]);
740      idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[1]);
741      idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
742      util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts);
743   }
744}
745