vl_idct.c revision 9d2e630cd02362bfa8f090640a55cf2dea9d64b3
1/************************************************************************** 2 * 3 * Copyright 2010 Christian König 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "vl_idct.h" 29#include "vl_vertex_buffers.h" 30#include "vl_defines.h" 31#include "util/u_draw.h" 32#include <assert.h> 33#include <pipe/p_context.h> 34#include <pipe/p_screen.h> 35#include <util/u_inlines.h> 36#include <util/u_sampler.h> 37#include <util/u_format.h> 38#include <tgsi/tgsi_ureg.h> 39#include "vl_types.h" 40 41#define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f) 42 43#define NR_RENDER_TARGETS 4 44 45enum VS_OUTPUT 46{ 47 VS_O_VPOS, 48 VS_O_L_ADDR0, 49 VS_O_L_ADDR1, 50 VS_O_R_ADDR0, 51 VS_O_R_ADDR1 52}; 53 54static const float const_matrix[8][8] = { 55 { 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.353553f, 0.3535530f }, 56 { 0.4903930f, 0.4157350f, 0.2777850f, 0.0975451f, -0.0975452f, -0.2777850f, -0.415735f, -0.4903930f }, 57 { 0.4619400f, 0.1913420f, -0.1913420f, -0.4619400f, -0.4619400f, -0.1913420f, 0.191342f, 0.4619400f }, 58 { 0.4157350f, -0.0975452f, -0.4903930f, -0.2777850f, 0.2777850f, 0.4903930f, 0.097545f, -0.4157350f }, 59 { 0.3535530f, -0.3535530f, -0.3535530f, 0.3535540f, 0.3535530f, -0.3535540f, -0.353553f, 0.3535530f }, 60 { 0.2777850f, -0.4903930f, 0.0975452f, 0.4157350f, -0.4157350f, -0.0975451f, 0.490393f, -0.2777850f }, 61 { 0.1913420f, -0.4619400f, 0.4619400f, -0.1913420f, -0.1913410f, 0.4619400f, -0.461940f, 0.1913420f }, 62 { 0.0975451f, -0.2777850f, 0.4157350f, -0.4903930f, 0.4903930f, -0.4157350f, 0.277786f, -0.0975458f } 63}; 64 65static void 66calc_addr(struct ureg_program *shader, struct ureg_dst addr[2], 67 struct ureg_src tc, struct ureg_src start, bool right_side, 68 bool transposed, float size) 69{ 70 unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y; 71 unsigned sw_start = right_side ? TGSI_SWIZZLE_Y : TGSI_SWIZZLE_X; 72 73 unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X; 74 unsigned sw_tc = right_side ? TGSI_SWIZZLE_X : TGSI_SWIZZLE_Y; 75 76 /* 77 * addr[0..1].(start) = right_side ? start.x : tc.x 78 * addr[0..1].(tc) = right_side ? tc.y : start.y 79 * addr[0..1].z = tc.z 80 * addr[1].(start) += 1.0f / scale 81 */ 82 ureg_MOV(shader, ureg_writemask(addr[0], wm_start), ureg_scalar(start, sw_start)); 83 ureg_MOV(shader, ureg_writemask(addr[0], wm_tc), ureg_scalar(tc, sw_tc)); 84 ureg_MOV(shader, ureg_writemask(addr[0], TGSI_WRITEMASK_Z), tc); 85 86 ureg_ADD(shader, ureg_writemask(addr[1], wm_start), ureg_scalar(start, sw_start), ureg_imm1f(shader, 1.0f / size)); 87 ureg_MOV(shader, ureg_writemask(addr[1], wm_tc), ureg_scalar(tc, sw_tc)); 88 ureg_MOV(shader, ureg_writemask(addr[1], TGSI_WRITEMASK_Z), tc); 89} 90 91static void * 92create_vert_shader(struct vl_idct *idct, bool matrix_stage) 93{ 94 struct ureg_program *shader; 95 struct ureg_src vrect, vpos, vblock, eb; 96 struct ureg_src scale, blocks_xy; 97 struct ureg_dst t_tex, t_start; 98 struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2]; 99 unsigned label; 100 101 shader = ureg_create(TGSI_PROCESSOR_VERTEX); 102 if (!shader) 103 return NULL; 104 105 t_tex = ureg_DECL_temporary(shader); 106 t_start = ureg_DECL_temporary(shader); 107 108 vrect = ureg_DECL_vs_input(shader, VS_I_RECT); 109 vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); 110 vblock = ureg_swizzle(vrect, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 111 112 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); 113 114 eb = ureg_DECL_vs_input(shader, VS_I_EB); 115 116 o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0); 117 o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1); 118 119 o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0); 120 o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1); 121 122 /* 123 * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height) 124 * blocks_xy = (blocks_x, blocks_y) 125 * 126 * if eb.(vblock.y, vblock.x) 127 * o_vpos.xy = -1 128 * else 129 * t_tex = vpos * blocks_xy + vblock 130 * t_start = t_tex * scale 131 * t_tex = t_tex + vrect 132 * o_vpos.xy = t_tex * scale 133 * 134 * o_l_addr = calc_addr(...) 135 * o_r_addr = calc_addr(...) 136 * endif 137 * o_vpos.zw = vpos 138 * 139 */ 140 141 scale = ureg_imm2f(shader, 142 (float)BLOCK_WIDTH / idct->buffer_width, 143 (float)BLOCK_HEIGHT / idct->buffer_height); 144 145 blocks_xy = ureg_imm2f(shader, idct->blocks_x, idct->blocks_y); 146 147 if (idct->blocks_x > 1 || idct->blocks_y > 1) { 148 ureg_CMP(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), 149 ureg_negate(ureg_scalar(vblock, TGSI_SWIZZLE_Y)), 150 ureg_swizzle(eb, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W), 151 ureg_swizzle(eb, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y)); 152 153 ureg_CMP(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_X), 154 ureg_negate(ureg_scalar(vblock, TGSI_SWIZZLE_X)), 155 ureg_scalar(ureg_src(t_tex), TGSI_SWIZZLE_Y), 156 ureg_scalar(ureg_src(t_tex), TGSI_SWIZZLE_X)); 157 158 eb = ureg_src(t_tex); 159 } 160 161 ureg_IF(shader, ureg_scalar(eb, TGSI_SWIZZLE_X), &label); 162 163 ureg_MOV(shader, o_vpos, ureg_imm1f(shader, -1.0f)); 164 165 ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); 166 ureg_ELSE(shader, &label); 167 168 ureg_MAD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, blocks_xy, vblock); 169 ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale); 170 171 ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), vrect); 172 173 ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale); 174 ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_Z), 175 ureg_scalar(vrect, TGSI_SWIZZLE_X), 176 ureg_imm1f(shader, BLOCK_WIDTH / NR_RENDER_TARGETS)); 177 178 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex)); 179 180 if(matrix_stage) { 181 calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4); 182 calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, BLOCK_WIDTH / 4); 183 } else { 184 calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, BLOCK_WIDTH / 4); 185 calc_addr(shader, o_r_addr, ureg_src(t_tex), ureg_src(t_start), true, false, idct->buffer_height / 4); 186 } 187 188 ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); 189 ureg_ENDIF(shader); 190 191 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos); 192 193 ureg_release_temporary(shader, t_tex); 194 ureg_release_temporary(shader, t_start); 195 196 ureg_END(shader); 197 198 return ureg_create_shader_and_destroy(shader, idct->pipe); 199} 200 201static void 202increment_addr(struct ureg_program *shader, struct ureg_dst daddr[2], 203 struct ureg_src saddr[2], bool right_side, bool transposed, 204 int pos, float size) 205{ 206 unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y; 207 unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X; 208 209 /* 210 * daddr[0..1].(start) = saddr[0..1].(start) 211 * daddr[0..1].(tc) = saddr[0..1].(tc) 212 */ 213 214 ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]); 215 ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size)); 216 ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]); 217 ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size)); 218} 219 220static void 221fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2], struct ureg_src sampler) 222{ 223 ureg_TEX(shader, m[0], TGSI_TEXTURE_3D, addr[0], sampler); 224 ureg_TEX(shader, m[1], TGSI_TEXTURE_3D, addr[1], sampler); 225} 226 227static void 228matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2]) 229{ 230 struct ureg_dst tmp; 231 232 tmp = ureg_DECL_temporary(shader); 233 234 /* 235 * tmp.xy = dot4(m[0][0..1], m[1][0..1]) 236 * dst = tmp.x + tmp.y 237 */ 238 ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0])); 239 ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(l[1]), ureg_src(r[1])); 240 ureg_ADD(shader, dst, 241 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), 242 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); 243 244 ureg_release_temporary(shader, tmp); 245} 246 247static void * 248create_matrix_frag_shader(struct vl_idct *idct) 249{ 250 struct ureg_program *shader; 251 252 struct ureg_src l_addr[2], r_addr[2]; 253 254 struct ureg_dst l[4][2], r[2]; 255 struct ureg_dst fragment[NR_RENDER_TARGETS]; 256 257 unsigned i, j; 258 259 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); 260 if (!shader) 261 return NULL; 262 263 l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); 264 l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); 265 266 r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR); 267 r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR); 268 269 for (i = 0; i < NR_RENDER_TARGETS; ++i) 270 fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i); 271 272 for (i = 0; i < 4; ++i) { 273 l[i][0] = ureg_DECL_temporary(shader); 274 l[i][1] = ureg_DECL_temporary(shader); 275 } 276 277 r[0] = ureg_DECL_temporary(shader); 278 r[1] = ureg_DECL_temporary(shader); 279 280 for (i = 1; i < 4; ++i) { 281 increment_addr(shader, l[i], l_addr, false, false, i, idct->buffer_height); 282 } 283 284 for (i = 0; i < 4; ++i) { 285 struct ureg_src s_addr[2]; 286 s_addr[0] = i == 0 ? l_addr[0] : ureg_src(l[i][0]); 287 s_addr[1] = i == 0 ? l_addr[1] : ureg_src(l[i][1]); 288 fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 1)); 289 } 290 291 for (i = 0; i < NR_RENDER_TARGETS; ++i) { 292 if(i > 0) 293 increment_addr(shader, r, r_addr, true, true, i, BLOCK_HEIGHT); 294 295 struct ureg_src s_addr[2] = { ureg_src(r[0]), ureg_src(r[1]) }; 296 s_addr[0] = i == 0 ? r_addr[0] : ureg_src(r[0]); 297 s_addr[1] = i == 0 ? r_addr[1] : ureg_src(r[1]); 298 fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 0)); 299 300 for (j = 0; j < 4; ++j) { 301 matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r); 302 } 303 } 304 305 for (i = 0; i < 4; ++i) { 306 ureg_release_temporary(shader, l[i][0]); 307 ureg_release_temporary(shader, l[i][1]); 308 } 309 ureg_release_temporary(shader, r[0]); 310 ureg_release_temporary(shader, r[1]); 311 312 ureg_END(shader); 313 314 return ureg_create_shader_and_destroy(shader, idct->pipe); 315} 316 317static void * 318create_transpose_frag_shader(struct vl_idct *idct) 319{ 320 struct ureg_program *shader; 321 322 struct ureg_src l_addr[2], r_addr[2]; 323 324 struct ureg_dst l[2], r[2]; 325 struct ureg_dst fragment; 326 327 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); 328 if (!shader) 329 return NULL; 330 331 l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); 332 l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); 333 334 r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR); 335 r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR); 336 337 l[0] = ureg_DECL_temporary(shader); 338 l[1] = ureg_DECL_temporary(shader); 339 r[0] = ureg_DECL_temporary(shader); 340 r[1] = ureg_DECL_temporary(shader); 341 342 fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 0)); 343 fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 1)); 344 345 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); 346 347 matrix_mul(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X), l, r); 348 349 ureg_release_temporary(shader, l[0]); 350 ureg_release_temporary(shader, l[1]); 351 ureg_release_temporary(shader, r[0]); 352 ureg_release_temporary(shader, r[1]); 353 354 ureg_END(shader); 355 356 return ureg_create_shader_and_destroy(shader, idct->pipe); 357} 358 359static bool 360init_shaders(struct vl_idct *idct) 361{ 362 idct->matrix_vs = create_vert_shader(idct, true); 363 if (!idct->matrix_vs) 364 goto error_matrix_vs; 365 366 idct->matrix_fs = create_matrix_frag_shader(idct); 367 if (!idct->matrix_fs) 368 goto error_matrix_fs; 369 370 idct->transpose_vs = create_vert_shader(idct, false); 371 if (!idct->transpose_vs) 372 goto error_transpose_vs; 373 374 idct->transpose_fs = create_transpose_frag_shader(idct); 375 if (!idct->transpose_fs) 376 goto error_transpose_fs; 377 378 return true; 379 380error_transpose_fs: 381 idct->pipe->delete_vs_state(idct->pipe, idct->transpose_vs); 382 383error_transpose_vs: 384 idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs); 385 386error_matrix_fs: 387 idct->pipe->delete_vs_state(idct->pipe, idct->matrix_vs); 388 389error_matrix_vs: 390 return false; 391} 392 393static void 394cleanup_shaders(struct vl_idct *idct) 395{ 396 idct->pipe->delete_vs_state(idct->pipe, idct->matrix_vs); 397 idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs); 398 idct->pipe->delete_vs_state(idct->pipe, idct->transpose_vs); 399 idct->pipe->delete_fs_state(idct->pipe, idct->transpose_fs); 400} 401 402static bool 403init_state(struct vl_idct *idct) 404{ 405 struct pipe_sampler_state sampler; 406 struct pipe_rasterizer_state rs_state; 407 unsigned i; 408 409 assert(idct); 410 411 memset(&rs_state, 0, sizeof(rs_state)); 412 rs_state.gl_rasterization_rules = false; 413 idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state); 414 if (!idct->rs_state) 415 goto error_rs_state; 416 417 for (i = 0; i < 2; ++i) { 418 memset(&sampler, 0, sizeof(sampler)); 419 sampler.wrap_s = PIPE_TEX_WRAP_REPEAT; 420 sampler.wrap_t = PIPE_TEX_WRAP_REPEAT; 421 sampler.wrap_r = PIPE_TEX_WRAP_REPEAT; 422 sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; 423 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; 424 sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; 425 sampler.compare_mode = PIPE_TEX_COMPARE_NONE; 426 sampler.compare_func = PIPE_FUNC_ALWAYS; 427 sampler.normalized_coords = 1; 428 idct->samplers[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler); 429 if (!idct->samplers[i]) 430 goto error_samplers; 431 } 432 433 return true; 434 435error_samplers: 436 for (i = 0; i < 2; ++i) 437 if (idct->samplers[i]) 438 idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]); 439 440 idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state); 441 442error_rs_state: 443 return false; 444} 445 446static void 447cleanup_state(struct vl_idct *idct) 448{ 449 unsigned i; 450 451 for (i = 0; i < 2; ++i) 452 idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]); 453 454 idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state); 455} 456 457static bool 458init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer) 459{ 460 struct pipe_resource tex_templ, *tex; 461 struct pipe_sampler_view sv_templ; 462 struct pipe_surface surf_templ; 463 unsigned i; 464 465 assert(idct && buffer); 466 467 memset(&tex_templ, 0, sizeof(tex_templ)); 468 tex_templ.target = PIPE_TEXTURE_3D; 469 tex_templ.format = PIPE_FORMAT_R16G16B16A16_SNORM; 470 tex_templ.width0 = idct->buffer_width / NR_RENDER_TARGETS; 471 tex_templ.height0 = idct->buffer_height / 4; 472 tex_templ.depth0 = NR_RENDER_TARGETS; 473 tex_templ.array_size = 1; 474 tex_templ.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET; 475 tex_templ.usage = PIPE_USAGE_STATIC; 476 477 tex = idct->pipe->screen->resource_create(idct->pipe->screen, &tex_templ); 478 if (!tex) 479 goto error_tex; 480 481 memset(&sv_templ, 0, sizeof(sv_templ)); 482 u_sampler_view_default_template(&sv_templ, tex, tex->format); 483 buffer->sampler_views.individual.intermediate = 484 idct->pipe->create_sampler_view(idct->pipe, tex, &sv_templ); 485 if (!buffer->sampler_views.individual.intermediate) 486 goto error_sampler_view; 487 488 buffer->fb_state[0].width = tex->width0; 489 buffer->fb_state[0].height = tex->height0; 490 buffer->fb_state[0].nr_cbufs = NR_RENDER_TARGETS; 491 for(i = 0; i < NR_RENDER_TARGETS; ++i) { 492 memset(&surf_templ, 0, sizeof(surf_templ)); 493 surf_templ.format = tex->format; 494 surf_templ.u.tex.first_layer = i; 495 surf_templ.u.tex.last_layer = i; 496 surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET; 497 buffer->fb_state[0].cbufs[i] = idct->pipe->create_surface( 498 idct->pipe, tex, &surf_templ); 499 500 if (!buffer->fb_state[0].cbufs[i]) 501 goto error_surfaces; 502 } 503 504 buffer->viewport[0].scale[0] = tex->width0; 505 buffer->viewport[0].scale[1] = tex->height0; 506 507 pipe_resource_reference(&tex, NULL); 508 return true; 509 510error_surfaces: 511 for(i = 0; i < NR_RENDER_TARGETS; ++i) 512 pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL); 513 514 pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, NULL); 515 516error_sampler_view: 517 pipe_resource_reference(&tex, NULL); 518 519error_tex: 520 return false; 521} 522 523static void 524cleanup_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer) 525{ 526 unsigned i; 527 528 assert(idct && buffer); 529 530 for(i = 0; i < NR_RENDER_TARGETS; ++i) 531 pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL); 532 533 pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, NULL); 534} 535 536struct pipe_sampler_view * 537vl_idct_upload_matrix(struct pipe_context *pipe) 538{ 539 const float scale = sqrtf(SCALE_FACTOR_16_TO_9); 540 541 struct pipe_resource tex_templ, *matrix; 542 struct pipe_sampler_view sv_templ, *sv; 543 struct pipe_transfer *buf_transfer; 544 unsigned i, j, pitch; 545 float *f; 546 547 struct pipe_box rect = 548 { 549 0, 0, 0, 550 BLOCK_WIDTH / 4, 551 BLOCK_HEIGHT, 552 1 553 }; 554 555 assert(pipe); 556 557 memset(&tex_templ, 0, sizeof(tex_templ)); 558 tex_templ.target = PIPE_TEXTURE_2D; 559 tex_templ.format = PIPE_FORMAT_R32G32B32A32_FLOAT; 560 tex_templ.last_level = 0; 561 tex_templ.width0 = 2; 562 tex_templ.height0 = 8; 563 tex_templ.depth0 = 1; 564 tex_templ.array_size = 1; 565 tex_templ.usage = PIPE_USAGE_IMMUTABLE; 566 tex_templ.bind = PIPE_BIND_SAMPLER_VIEW; 567 tex_templ.flags = 0; 568 569 matrix = pipe->screen->resource_create(pipe->screen, &tex_templ); 570 if (!matrix) 571 goto error_matrix; 572 573 buf_transfer = pipe->get_transfer 574 ( 575 pipe, matrix, 576 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, 577 &rect 578 ); 579 if (!buf_transfer) 580 goto error_transfer; 581 582 pitch = buf_transfer->stride / sizeof(float); 583 584 f = pipe->transfer_map(pipe, buf_transfer); 585 if (!f) 586 goto error_map; 587 588 for(i = 0; i < BLOCK_HEIGHT; ++i) 589 for(j = 0; j < BLOCK_WIDTH; ++j) 590 // transpose and scale 591 f[i * pitch + j] = const_matrix[j][i] * scale; 592 593 pipe->transfer_unmap(pipe, buf_transfer); 594 pipe->transfer_destroy(pipe, buf_transfer); 595 596 memset(&sv_templ, 0, sizeof(sv_templ)); 597 u_sampler_view_default_template(&sv_templ, matrix, matrix->format); 598 sv = pipe->create_sampler_view(pipe, matrix, &sv_templ); 599 pipe_resource_reference(&matrix, NULL); 600 if (!sv) 601 goto error_map; 602 603 return sv; 604 605error_map: 606 pipe->transfer_destroy(pipe, buf_transfer); 607 608error_transfer: 609 pipe_resource_reference(&matrix, NULL); 610 611error_matrix: 612 return NULL; 613} 614 615bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, 616 unsigned buffer_width, unsigned buffer_height, 617 unsigned blocks_x, unsigned blocks_y, 618 struct pipe_sampler_view *matrix) 619{ 620 assert(idct && pipe && matrix); 621 622 idct->pipe = pipe; 623 idct->buffer_width = buffer_width; 624 idct->buffer_height = buffer_height; 625 idct->blocks_x = blocks_x; 626 idct->blocks_y = blocks_y; 627 pipe_sampler_view_reference(&idct->matrix, matrix); 628 629 if(!init_shaders(idct)) 630 return false; 631 632 if(!init_state(idct)) { 633 cleanup_shaders(idct); 634 return false; 635 } 636 637 return true; 638} 639 640void 641vl_idct_cleanup(struct vl_idct *idct) 642{ 643 cleanup_shaders(idct); 644 cleanup_state(idct); 645 646 pipe_sampler_view_reference(&idct->matrix, NULL); 647} 648 649bool 650vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, 651 struct pipe_sampler_view *source, struct pipe_surface *destination) 652{ 653 unsigned i; 654 655 assert(buffer); 656 assert(idct); 657 assert(source); 658 assert(destination); 659 660 pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, idct->matrix); 661 pipe_sampler_view_reference(&buffer->sampler_views.individual.source, source); 662 pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, idct->matrix); 663 664 if (!init_intermediate(idct, buffer)) 665 return false; 666 667 /* init state */ 668 buffer->fb_state[1].width = destination->texture->width0; 669 buffer->fb_state[1].height = destination->texture->height0; 670 buffer->fb_state[1].nr_cbufs = 1; 671 pipe_surface_reference(&buffer->fb_state[1].cbufs[0], destination); 672 673 buffer->viewport[1].scale[0] = destination->texture->width0; 674 buffer->viewport[1].scale[1] = destination->texture->height0; 675 676 for(i = 0; i < 2; ++i) { 677 buffer->viewport[i].scale[2] = 1; 678 buffer->viewport[i].scale[3] = 1; 679 buffer->viewport[i].translate[0] = 0; 680 buffer->viewport[i].translate[1] = 0; 681 buffer->viewport[i].translate[2] = 0; 682 buffer->viewport[i].translate[3] = 0; 683 684 buffer->fb_state[i].zsbuf = NULL; 685 } 686 687 return true; 688} 689 690void 691vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer) 692{ 693 unsigned i; 694 695 assert(idct && buffer); 696 697 for(i = 0; i < NR_RENDER_TARGETS; ++i) 698 pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL); 699 700 pipe_surface_reference(&buffer->fb_state[1].cbufs[0], NULL); 701 702 cleanup_intermediate(idct, buffer); 703} 704 705void 706vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_instances) 707{ 708 unsigned num_verts; 709 710 assert(idct); 711 assert(buffer); 712 713 if(num_instances > 0) { 714 num_verts = idct->blocks_x * idct->blocks_y * 4; 715 716 idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state); 717 idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers); 718 719 /* first stage */ 720 idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[0]); 721 idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport[0]); 722 idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]); 723 idct->pipe->bind_vs_state(idct->pipe, idct->matrix_vs); 724 idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs); 725 util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts, 0, num_instances); 726 727 /* second stage */ 728 idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[1]); 729 idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport[1]); 730 idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[1]); 731 idct->pipe->bind_vs_state(idct->pipe, idct->transpose_vs); 732 idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs); 733 util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts, 0, num_instances); 734 } 735} 736