vl_idct.c revision 4abe7382882a451a7750ccc451b8568768d122cb
1bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea/************************************************************************** 2bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * 3bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * Copyright 2010 Christian König 4bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * All Rights Reserved. 5bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * 6bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * Permission is hereby granted, free of charge, to any person obtaining a 7bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * copy of this software and associated documentation files (the 8bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * "Software"), to deal in the Software without restriction, including 9bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * without limitation the rights to use, copy, modify, merge, publish, 10bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * distribute, sub license, and/or sell copies of the Software, and to 11bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * permit persons to whom the Software is furnished to do so, subject to 12bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * the following conditions: 13bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * 14bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * The above copyright notice and this permission notice (including the 15bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * next paragraph) shall be included in all copies or substantial portions 16bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * of the Software. 17bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * 18bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * 26bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea **************************************************************************/ 27bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 28bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include "vl_idct.h" 29bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include "vl_vertex_buffers.h" 30bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include "util/u_draw.h" 31bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include <assert.h> 32bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include <pipe/p_context.h> 33bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include <pipe/p_screen.h> 34bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include <util/u_inlines.h> 35bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include <util/u_sampler.h> 36bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include <util/u_format.h> 37bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include <tgsi/tgsi_ureg.h> 38bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include "vl_types.h" 39bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 40bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#define BLOCK_WIDTH 8 41bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#define BLOCK_HEIGHT 8 42bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 43bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f) 44bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 45bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#define STAGE1_SCALE 4.0f 46bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#define STAGE2_SCALE (SCALE_FACTOR_16_TO_9 / STAGE1_SCALE) 47bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 48bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#define NR_RENDER_TARGETS 1 49bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 50bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleastruct vertex_shader_consts 51bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea{ 52bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct vertex4f norm; 53bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea}; 54bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 55bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleaenum VS_INPUT 56bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea{ 57bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea VS_I_RECT, 58bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea VS_I_VPOS, 59bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 60bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea NUM_VS_INPUTS 61bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea}; 62bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 63bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleaenum VS_OUTPUT 64bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea{ 65bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea VS_O_VPOS, 66bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea VS_O_BLOCK, 67bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea VS_O_TEX, 68bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea VS_O_START 69bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea}; 70bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 71bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleastatic const float const_matrix[8][8] = { 72bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea { 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.353553f, 0.3535530f }, 73bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea { 0.4903930f, 0.4157350f, 0.2777850f, 0.0975451f, -0.0975452f, -0.2777850f, -0.415735f, -0.4903930f }, 74bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea { 0.4619400f, 0.1913420f, -0.1913420f, -0.4619400f, -0.4619400f, -0.1913420f, 0.191342f, 0.4619400f }, 75bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea { 0.4157350f, -0.0975452f, -0.4903930f, -0.2777850f, 0.2777850f, 0.4903930f, 0.097545f, -0.4157350f }, 76bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea { 0.3535530f, -0.3535530f, -0.3535530f, 0.3535540f, 0.3535530f, -0.3535540f, -0.353553f, 0.3535530f }, 77bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea { 0.2777850f, -0.4903930f, 0.0975452f, 0.4157350f, -0.4157350f, -0.0975451f, 0.490393f, -0.2777850f }, 78bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea { 0.1913420f, -0.4619400f, 0.4619400f, -0.1913420f, -0.1913410f, 0.4619400f, -0.461940f, 0.1913420f }, 79bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea { 0.0975451f, -0.2777850f, 0.4157350f, -0.4903930f, 0.4903930f, -0.4157350f, 0.277786f, -0.0975458f } 80bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea}; 81bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 82bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleastatic void * 83bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleacreate_vert_shader(struct vl_idct *idct, bool calc_src_cords) 84bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea{ 85bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct ureg_program *shader; 86bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct ureg_src scale; 87bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct ureg_src vrect, vpos; 88bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct ureg_dst t_vpos; 89bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct ureg_dst o_vpos, o_block, o_tex, o_start; 90bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 91bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea shader = ureg_create(TGSI_PROCESSOR_VERTEX); 92bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea if (!shader) 93bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea return NULL; 94bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 95bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea t_vpos = ureg_DECL_temporary(shader); 96bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 97bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea vrect = ureg_DECL_vs_input(shader, VS_I_RECT); 98bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); 99bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 100bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); 101bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 102bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea /* 103bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height) 104bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * 105bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * t_vpos = vpos + vrect 106bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * o_vpos.xy = t_vpos * scale 107bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * o_vpos.zw = vpos 108bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * 109bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * o_block = vrect 110bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * o_tex = t_pos 111bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * o_start = vpos * scale 112bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * 113bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea */ 114bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea scale = ureg_imm2f(shader, 115bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea (float)BLOCK_WIDTH / idct->destination->width0, 116bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea (float)BLOCK_HEIGHT / idct->destination->height0); 117bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 118bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect); 119bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), scale); 120bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos)); 121bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos); 122bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 123bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea if(calc_src_cords) { 124bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea o_block = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK); 125bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea o_tex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX); 126bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea o_start = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_START); 127bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 128bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_MOV(shader, ureg_writemask(o_block, TGSI_WRITEMASK_XY), vrect); 129bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_MOV(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_XY), ureg_src(t_vpos)); 130bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_MUL(shader, ureg_writemask(o_start, TGSI_WRITEMASK_XY), vpos, scale); 131bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea } 132bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 133bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_release_temporary(shader, t_vpos); 134bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 135bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_END(shader); 136bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 137bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea return ureg_create_shader_and_destroy(shader, idct->pipe); 138bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea} 139bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 140bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleastatic void 141bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleafetch_one(struct ureg_program *shader, struct ureg_dst m[2], 142bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct ureg_src tc, struct ureg_src sampler, 143bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct ureg_src start, struct ureg_src block, float height) 144bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea{ 145bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct ureg_dst t_tc, tmp; 146bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea unsigned i, j; 147bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 148bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea t_tc = ureg_DECL_temporary(shader); 149bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea tmp = ureg_DECL_temporary(shader); 150bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 151bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea m[0] = ureg_DECL_temporary(shader); 152bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea m[1] = ureg_DECL_temporary(shader); 153bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 154bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea /* 155bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * t_tc.x = right_side ? start.x : tc.x 156bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * t_tc.y = right_side ? tc.y : start.y 157bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * m[0..1].xyzw = tex(t_tc++, sampler) 158bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea */ 159bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_scalar(tc, TGSI_SWIZZLE_X)); 160bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), ureg_scalar(start, TGSI_SWIZZLE_Y)); 161bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 162bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#if NR_RENDER_TARGETS == 8 163bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_scalar(block, TGSI_SWIZZLE_X)); 164bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#else 165bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_imm1f(shader, 0.0f)); 166bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#endif 167bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 168bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea for(i = 0; i < 2; ++i) { 169bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea for(j = 0; j < 4; ++j) { 170bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea /* Nouveau and r600g can't writemask tex dst regs (yet?), do in two steps */ 171bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_TEX(shader, tmp, TGSI_TEXTURE_3D, ureg_src(t_tc), sampler); 172bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_MOV(shader, ureg_writemask(m[i], TGSI_WRITEMASK_X << j), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); 173bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 174bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea if(i != 1 || j != 3) /* skip the last add */ 175bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), 176bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_src(t_tc), ureg_imm1f(shader, 1.0f / height)); 177bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea } 178bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea } 179bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 180bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_release_temporary(shader, t_tc); 181bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_release_temporary(shader, tmp); 182bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea} 183bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 184bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleastatic void 185bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleafetch_four(struct ureg_program *shader, struct ureg_dst m[2], 186bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct ureg_src tc, struct ureg_src sampler, 187bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct ureg_src start, bool right_side, float width) 188bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea{ 189bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct ureg_dst t_tc; 190bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 191bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea t_tc = ureg_DECL_temporary(shader); 192bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea m[0] = ureg_DECL_temporary(shader); 193bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea m[1] = ureg_DECL_temporary(shader); 194bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 195bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea /* 196bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * t_tc.x = right_side ? start.x : tc.x 197bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * t_tc.y = right_side ? tc.y : start.y 198bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * m[0..1] = tex(t_tc++, sampler) 199bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea */ 200bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea if(right_side) { 201bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_scalar(start, TGSI_SWIZZLE_Y)); 202bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), ureg_scalar(tc, TGSI_SWIZZLE_X)); 203bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea } else { 204bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_scalar(start, TGSI_SWIZZLE_X)); 205bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), ureg_scalar(tc, TGSI_SWIZZLE_Y)); 206bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea } 207bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 208bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_TEX(shader, m[0], TGSI_TEXTURE_2D, ureg_src(t_tc), sampler); 209bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_src(t_tc), ureg_imm1f(shader, 1.0f / width)); 210bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_TEX(shader, m[1], TGSI_TEXTURE_2D, ureg_src(t_tc), sampler); 211bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 212bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_release_temporary(shader, t_tc); 213bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea} 214bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 215bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleastatic void 216bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleamatrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2]) 217bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea{ 218bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct ureg_dst tmp[2]; 219bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea unsigned i; 220bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 221bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea for(i = 0; i < 2; ++i) { 222bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea tmp[i] = ureg_DECL_temporary(shader); 223bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea } 224bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 225bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea /* 226bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * tmp[0..1] = dot4(m[0][0..1], m[1][0..1]) 227bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * dst = tmp[0] + tmp[1] 228bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea */ 229bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_DP4(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0])); 230bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_DP4(shader, ureg_writemask(tmp[1], TGSI_WRITEMASK_X), ureg_src(l[1]), ureg_src(r[1])); 231bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_ADD(shader, dst, ureg_src(tmp[0]), ureg_src(tmp[1])); 232bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 233bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea for(i = 0; i < 2; ++i) { 234bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_release_temporary(shader, tmp[i]); 235bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea } 236bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea} 237bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 238bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleastatic void * 239bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleacreate_transpose_frag_shader(struct vl_idct *idct) 240bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea{ 241bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct pipe_resource *transpose = idct->textures.individual.transpose; 242bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct pipe_resource *intermediate = idct->textures.individual.intermediate; 243bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 244bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct ureg_program *shader; 245bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 246bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct ureg_src block, tex, sampler[2]; 247bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct ureg_src start[2]; 248bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 249bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct ureg_dst m[2][2]; 250bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct ureg_dst tmp, fragment; 251bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 252bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); 253bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea if (!shader) 254bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea return NULL; 255bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 256bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea block = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR); 257bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea tex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_CONSTANT); 258bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 259bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea sampler[0] = ureg_DECL_sampler(shader, 0); 260bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea sampler[1] = ureg_DECL_sampler(shader, 1); 261bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 262bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea start[0] = ureg_imm1f(shader, 0.0f); 263bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea start[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT); 264bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 265bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea fetch_four(shader, m[0], block, sampler[0], start[0], false, transpose->width0); 266bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea fetch_one(shader, m[1], tex, sampler[1], start[1], block, intermediate->height0); 267bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 268bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); 269bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 270bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea tmp = ureg_DECL_temporary(shader); 271bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea matrix_mul(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), m[0], m[1]); 272bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_MUL(shader, fragment, ureg_src(tmp), ureg_imm1f(shader, STAGE2_SCALE)); 273bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 274bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_release_temporary(shader, tmp); 275bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_release_temporary(shader, m[0][0]); 276bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_release_temporary(shader, m[0][1]); 277bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_release_temporary(shader, m[1][0]); 278bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_release_temporary(shader, m[1][1]); 279bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 280bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_END(shader); 281bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 282bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea return ureg_create_shader_and_destroy(shader, idct->pipe); 283bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea} 284bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 285bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleastatic void * 286bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleacreate_matrix_frag_shader(struct vl_idct *idct) 287bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea{ 288bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct pipe_resource *matrix = idct->textures.individual.matrix; 289bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct pipe_resource *source = idct->textures.individual.source; 290bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 291bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct ureg_program *shader; 292bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 293bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct ureg_src tc[2], sampler[2]; 294bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct ureg_src start[2]; 295bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 296bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct ureg_dst l[2], r[2]; 297bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct ureg_dst t_tc, tmp, fragment[NR_RENDER_TARGETS]; 298bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 299bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea unsigned i; 300bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 301bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); 302bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea if (!shader) 303bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea return NULL; 304bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 305bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea t_tc = ureg_DECL_temporary(shader); 306bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea tmp = ureg_DECL_temporary(shader); 307bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 308bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR); 309bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR); 310bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 311bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea sampler[0] = ureg_DECL_sampler(shader, 1); 312bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea sampler[1] = ureg_DECL_sampler(shader, 0); 313bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 314bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea start[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT); 315bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea start[1] = ureg_imm1f(shader, 0.0f); 316bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 317bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea for (i = 0; i < NR_RENDER_TARGETS; ++i) 318bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i); 319bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 320bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea fetch_four(shader, l, tc[0], sampler[0], start[0], false, source->width0); 321bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_MUL(shader, l[0], ureg_src(l[0]), ureg_scalar(ureg_imm1f(shader, STAGE1_SCALE), TGSI_SWIZZLE_X)); 322bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_MUL(shader, l[1], ureg_src(l[1]), ureg_scalar(ureg_imm1f(shader, STAGE1_SCALE), TGSI_SWIZZLE_X)); 323bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 324bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea for (i = 0; i < NR_RENDER_TARGETS; ++i) { 325bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 326bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#if NR_RENDER_TARGETS == 8 327bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_imm1f(shader, 1.0f / BLOCK_WIDTH * i)); 328bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea fetch_four(shader, r, ureg_src(t_tc), sampler[1], start[1], true, matrix->width0); 329bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#elif NR_RENDER_TARGETS == 1 330bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea fetch_four(shader, r, tc[1], sampler[1], start[1], true, matrix->width0); 331bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#else 332bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#error invalid number of render targets 333bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#endif 334bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 335bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea matrix_mul(shader, fragment[i], l, r); 336bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_release_temporary(shader, r[0]); 337bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_release_temporary(shader, r[1]); 338bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea } 339bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 340bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_release_temporary(shader, t_tc); 341bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_release_temporary(shader, tmp); 342bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_release_temporary(shader, l[0]); 343bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_release_temporary(shader, l[1]); 344bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 345bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_END(shader); 346bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 347bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea return ureg_create_shader_and_destroy(shader, idct->pipe); 348bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea} 349bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 350bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleastatic void * 351bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleacreate_empty_block_frag_shader(struct vl_idct *idct) 352bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea{ 353bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct ureg_program *shader; 354bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct ureg_dst fragment; 355bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 356bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); 357bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea if (!shader) 358bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea return NULL; 359bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 360bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); 361bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 362bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_MOV(shader, fragment, ureg_imm1f(shader, 0.0f)); 363bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 364bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea ureg_END(shader); 365bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 366bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea return ureg_create_shader_and_destroy(shader, idct->pipe); 367bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea} 368bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 369bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleastatic bool 370bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleainit_shaders(struct vl_idct *idct) 371bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea{ 372bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea idct->matrix_vs = create_vert_shader(idct, true); 373bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea idct->matrix_fs = create_matrix_frag_shader(idct); 374bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 375bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea idct->transpose_vs = create_vert_shader(idct, true); 376bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea idct->transpose_fs = create_transpose_frag_shader(idct); 377bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 378bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea idct->eb_vs = create_vert_shader(idct, false); 379bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea idct->eb_fs = create_empty_block_frag_shader(idct); 380bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 381bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea return 382bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea idct->transpose_vs != NULL && idct->transpose_fs != NULL && 383bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea idct->matrix_vs != NULL && idct->matrix_fs != NULL && 384bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea idct->eb_vs != NULL && idct->eb_fs != NULL; 385bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea} 386bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 387bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleastatic void 388bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleacleanup_shaders(struct vl_idct *idct) 389bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea{ 390bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea idct->pipe->delete_vs_state(idct->pipe, idct->transpose_vs); 391 idct->pipe->delete_fs_state(idct->pipe, idct->transpose_fs); 392 393 idct->pipe->delete_vs_state(idct->pipe, idct->matrix_vs); 394 idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs); 395 396 idct->pipe->delete_vs_state(idct->pipe, idct->eb_vs); 397 idct->pipe->delete_fs_state(idct->pipe, idct->eb_fs); 398} 399 400static bool 401init_buffers(struct vl_idct *idct) 402{ 403 struct pipe_resource template; 404 struct pipe_sampler_view sampler_view; 405 struct pipe_vertex_element vertex_elems[2]; 406 unsigned i; 407 408 memset(&template, 0, sizeof(struct pipe_resource)); 409 template.last_level = 0; 410 template.depth0 = 1; 411 template.bind = PIPE_BIND_SAMPLER_VIEW; 412 template.flags = 0; 413 414 template.target = PIPE_TEXTURE_2D; 415 template.format = PIPE_FORMAT_R16G16B16A16_SNORM; 416 template.width0 = idct->destination->width0 / 4; 417 template.height0 = idct->destination->height0; 418 template.depth0 = 1; 419 template.usage = PIPE_USAGE_STREAM; 420 idct->textures.individual.source = idct->pipe->screen->resource_create(idct->pipe->screen, &template); 421 422 template.target = PIPE_TEXTURE_3D; 423 template.format = PIPE_FORMAT_R16_SNORM; 424 template.width0 = idct->destination->width0 / NR_RENDER_TARGETS; 425 template.depth0 = NR_RENDER_TARGETS; 426 template.usage = PIPE_USAGE_STATIC; 427 idct->textures.individual.intermediate = idct->pipe->screen->resource_create(idct->pipe->screen, &template); 428 429 for (i = 0; i < 4; ++i) { 430 if(idct->textures.all[i] == NULL) 431 return false; /* a texture failed to allocate */ 432 433 u_sampler_view_default_template(&sampler_view, idct->textures.all[i], idct->textures.all[i]->format); 434 idct->sampler_views.all[i] = idct->pipe->create_sampler_view(idct->pipe, idct->textures.all[i], &sampler_view); 435 } 436 437 idct->vertex_bufs.individual.quad = vl_vb_upload_quads(idct->pipe, idct->max_blocks); 438 439 if(idct->vertex_bufs.individual.quad.buffer == NULL) 440 return false; 441 442 idct->vertex_bufs.individual.pos.stride = sizeof(struct vertex2f); 443 idct->vertex_bufs.individual.pos.max_index = 4 * idct->max_blocks - 1; 444 idct->vertex_bufs.individual.pos.buffer_offset = 0; 445 idct->vertex_bufs.individual.pos.buffer = pipe_buffer_create 446 ( 447 idct->pipe->screen, 448 PIPE_BIND_VERTEX_BUFFER, 449 sizeof(struct vertex2f) * 4 * idct->max_blocks 450 ); 451 452 if(idct->vertex_bufs.individual.pos.buffer == NULL) 453 return false; 454 455 /* Rect element */ 456 vertex_elems[0].src_offset = 0; 457 vertex_elems[0].instance_divisor = 0; 458 vertex_elems[0].vertex_buffer_index = 0; 459 vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; 460 461 /* Pos element */ 462 vertex_elems[1].src_offset = 0; 463 vertex_elems[1].instance_divisor = 0; 464 vertex_elems[1].vertex_buffer_index = 1; 465 vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; 466 467 idct->vertex_elems_state = idct->pipe->create_vertex_elements_state(idct->pipe, 2, vertex_elems); 468 469 return true; 470} 471 472static void 473cleanup_buffers(struct vl_idct *idct) 474{ 475 unsigned i; 476 477 assert(idct); 478 479 for (i = 0; i < 4; ++i) { 480 pipe_sampler_view_reference(&idct->sampler_views.all[i], NULL); 481 pipe_resource_reference(&idct->textures.all[i], NULL); 482 } 483 484 idct->pipe->delete_vertex_elements_state(idct->pipe, idct->vertex_elems_state); 485 pipe_resource_reference(&idct->vertex_bufs.individual.quad.buffer, NULL); 486 pipe_resource_reference(&idct->vertex_bufs.individual.pos.buffer, NULL); 487} 488 489static void 490init_state(struct vl_idct *idct) 491{ 492 struct pipe_sampler_state sampler; 493 unsigned i; 494 495 idct->viewport[0].scale[0] = idct->textures.individual.intermediate->width0; 496 idct->viewport[0].scale[1] = idct->textures.individual.intermediate->height0; 497 498 idct->viewport[1].scale[0] = idct->destination->width0; 499 idct->viewport[1].scale[1] = idct->destination->height0; 500 501 idct->fb_state[0].width = idct->textures.individual.intermediate->width0; 502 idct->fb_state[0].height = idct->textures.individual.intermediate->height0; 503 504 idct->fb_state[0].nr_cbufs = NR_RENDER_TARGETS; 505 for(i = 0; i < NR_RENDER_TARGETS; ++i) { 506 idct->fb_state[0].cbufs[i] = idct->pipe->screen->get_tex_surface( 507 idct->pipe->screen, idct->textures.individual.intermediate, 0, 0, i, 508 PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET); 509 } 510 511 idct->fb_state[1].width = idct->destination->width0; 512 idct->fb_state[1].height = idct->destination->height0; 513 514 idct->fb_state[1].nr_cbufs = 1; 515 idct->fb_state[1].cbufs[0] = idct->pipe->screen->get_tex_surface( 516 idct->pipe->screen, idct->destination, 0, 0, 0, 517 PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET); 518 519 for(i = 0; i < 2; ++i) { 520 idct->viewport[i].scale[2] = 1; 521 idct->viewport[i].scale[3] = 1; 522 idct->viewport[i].translate[0] = 0; 523 idct->viewport[i].translate[1] = 0; 524 idct->viewport[i].translate[2] = 0; 525 idct->viewport[i].translate[3] = 0; 526 527 idct->fb_state[i].zsbuf = NULL; 528 } 529 530 for (i = 0; i < 4; ++i) { 531 memset(&sampler, 0, sizeof(sampler)); 532 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; 533 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; 534 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; 535 sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; 536 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; 537 sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; 538 sampler.compare_mode = PIPE_TEX_COMPARE_NONE; 539 sampler.compare_func = PIPE_FUNC_ALWAYS; 540 sampler.normalized_coords = 1; 541 /*sampler.shadow_ambient = ; */ 542 /*sampler.lod_bias = ; */ 543 sampler.min_lod = 0; 544 /*sampler.max_lod = ; */ 545 /*sampler.border_color[0] = ; */ 546 /*sampler.max_anisotropy = ; */ 547 idct->samplers.all[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler); 548 } 549} 550 551static void 552cleanup_state(struct vl_idct *idct) 553{ 554 unsigned i; 555 556 for(i = 0; i < NR_RENDER_TARGETS; ++i) { 557 idct->pipe->screen->tex_surface_destroy(idct->fb_state[0].cbufs[i]); 558 } 559 560 idct->pipe->screen->tex_surface_destroy(idct->fb_state[1].cbufs[0]); 561 562 for (i = 0; i < 4; ++i) 563 idct->pipe->delete_sampler_state(idct->pipe, idct->samplers.all[i]); 564} 565 566struct pipe_resource * 567vl_idct_upload_matrix(struct pipe_context *pipe) 568{ 569 struct pipe_resource template, *matrix; 570 struct pipe_transfer *buf_transfer; 571 unsigned i, j, pitch; 572 float *f; 573 574 struct pipe_box rect = 575 { 576 0, 0, 0, 577 BLOCK_WIDTH, 578 BLOCK_HEIGHT, 579 1 580 }; 581 582 memset(&template, 0, sizeof(struct pipe_resource)); 583 template.target = PIPE_TEXTURE_2D; 584 template.format = PIPE_FORMAT_R32G32B32A32_FLOAT; 585 template.last_level = 0; 586 template.width0 = 2; 587 template.height0 = 8; 588 template.depth0 = 1; 589 template.usage = PIPE_USAGE_IMMUTABLE; 590 template.bind = PIPE_BIND_SAMPLER_VIEW; 591 template.flags = 0; 592 593 matrix = pipe->screen->resource_create(pipe->screen, &template); 594 595 /* matrix */ 596 buf_transfer = pipe->get_transfer 597 ( 598 pipe, matrix, 599 u_subresource(0, 0), 600 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, 601 &rect 602 ); 603 pitch = buf_transfer->stride / sizeof(float); 604 605 f = pipe->transfer_map(pipe, buf_transfer); 606 for(i = 0; i < BLOCK_HEIGHT; ++i) 607 for(j = 0; j < BLOCK_WIDTH; ++j) 608 f[i * pitch + j] = const_matrix[j][i]; // transpose 609 610 pipe->transfer_unmap(pipe, buf_transfer); 611 pipe->transfer_destroy(pipe, buf_transfer); 612 613 return matrix; 614} 615 616static void 617xfer_buffers_map(struct vl_idct *idct) 618{ 619 struct pipe_box rect = 620 { 621 0, 0, 0, 622 idct->textures.individual.source->width0, 623 idct->textures.individual.source->height0, 624 1 625 }; 626 627 idct->tex_transfer = idct->pipe->get_transfer 628 ( 629 idct->pipe, idct->textures.individual.source, 630 u_subresource(0, 0), 631 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, 632 &rect 633 ); 634 635 idct->texels = idct->pipe->transfer_map(idct->pipe, idct->tex_transfer); 636} 637 638static void 639xfer_buffers_unmap(struct vl_idct *idct) 640{ 641 idct->pipe->transfer_unmap(idct->pipe, idct->tex_transfer); 642 idct->pipe->transfer_destroy(idct->pipe, idct->tex_transfer); 643} 644 645bool 646vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resource *dst, struct pipe_resource *matrix) 647{ 648 assert(idct && pipe && dst); 649 650 idct->pipe = pipe; 651 pipe_resource_reference(&idct->textures.individual.matrix, matrix); 652 pipe_resource_reference(&idct->textures.individual.transpose, matrix); 653 pipe_resource_reference(&idct->destination, dst); 654 655 idct->max_blocks = 656 align(idct->destination->width0, BLOCK_WIDTH) / BLOCK_WIDTH * 657 align(idct->destination->height0, BLOCK_HEIGHT) / BLOCK_HEIGHT * 658 idct->destination->depth0; 659 660 if(!init_buffers(idct)) 661 return false; 662 663 if(!init_shaders(idct)) { 664 cleanup_buffers(idct); 665 return false; 666 } 667 668 if(!vl_vb_init(&idct->blocks, idct->max_blocks)) { 669 cleanup_shaders(idct); 670 cleanup_buffers(idct); 671 return false; 672 } 673 674 if(!vl_vb_init(&idct->empty_blocks, idct->max_blocks)) { 675 vl_vb_cleanup(&idct->blocks); 676 cleanup_shaders(idct); 677 cleanup_buffers(idct); 678 return false; 679 } 680 681 init_state(idct); 682 683 xfer_buffers_map(idct); 684 685 return true; 686} 687 688void 689vl_idct_cleanup(struct vl_idct *idct) 690{ 691 vl_vb_cleanup(&idct->blocks); 692 vl_vb_cleanup(&idct->empty_blocks); 693 cleanup_shaders(idct); 694 cleanup_buffers(idct); 695 696 cleanup_state(idct); 697 698 pipe_resource_reference(&idct->destination, NULL); 699} 700 701void 702vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block) 703{ 704 unsigned tex_pitch; 705 short *texels; 706 707 unsigned i; 708 709 assert(idct); 710 711 if(block) { 712 tex_pitch = idct->tex_transfer->stride / sizeof(short); 713 texels = idct->texels + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH; 714 715 for (i = 0; i < BLOCK_HEIGHT; ++i) 716 memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * sizeof(short)); 717 718 vl_vb_add_block(&idct->blocks, false, x, y); 719 } else { 720 721 vl_vb_add_block(&idct->empty_blocks, true, x, y); 722 } 723} 724 725void 726vl_idct_flush(struct vl_idct *idct) 727{ 728 struct pipe_transfer *vec_transfer; 729 struct quadf *vectors; 730 unsigned num_blocks, num_empty_blocks; 731 732 assert(idct); 733 734 vectors = pipe_buffer_map 735 ( 736 idct->pipe, 737 idct->vertex_bufs.individual.pos.buffer, 738 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, 739 &vec_transfer 740 ); 741 742 num_blocks = vl_vb_upload(&idct->blocks, vectors); 743 num_empty_blocks = vl_vb_upload(&idct->empty_blocks, vectors + num_blocks); 744 745 pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.pos.buffer, vec_transfer); 746 747 xfer_buffers_unmap(idct); 748 749 if(num_blocks > 0) { 750 751 /* first stage */ 752 idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[0]); 753 idct->pipe->set_viewport_state(idct->pipe, &idct->viewport[0]); 754 755 idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all); 756 idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state); 757 idct->pipe->set_fragment_sampler_views(idct->pipe, 2, idct->sampler_views.stage[0]); 758 idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[0]); 759 idct->pipe->bind_vs_state(idct->pipe, idct->matrix_vs); 760 idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs); 761 762 util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_blocks * 4); 763 764 /* second stage */ 765 idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[1]); 766 idct->pipe->set_viewport_state(idct->pipe, &idct->viewport[1]); 767 768 idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all); 769 idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state); 770 idct->pipe->set_fragment_sampler_views(idct->pipe, 2, idct->sampler_views.stage[1]); 771 idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[1]); 772 idct->pipe->bind_vs_state(idct->pipe, idct->transpose_vs); 773 idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs); 774 775 util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_blocks * 4); 776 } 777 778 if(num_empty_blocks > 0) { 779 780 /* empty block handling */ 781 idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[1]); 782 idct->pipe->set_viewport_state(idct->pipe, &idct->viewport[1]); 783 784 idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all); 785 idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state); 786 idct->pipe->bind_vs_state(idct->pipe, idct->eb_vs); 787 idct->pipe->bind_fs_state(idct->pipe, idct->eb_fs); 788 789 util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, num_blocks * 4, num_empty_blocks * 4); 790 } 791 792 xfer_buffers_map(idct); 793} 794