vl_idct.c revision 59774e5c7a2756c5c430fc74bc80ea75d54f594d
1/************************************************************************** 2 * 3 * Copyright 2010 Christian König 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "vl_idct.h" 29#include "vl_vertex_buffers.h" 30#include "vl_ycbcr_buffer.h" 31#include "vl_defines.h" 32#include "util/u_draw.h" 33#include <assert.h> 34#include <pipe/p_context.h> 35#include <pipe/p_screen.h> 36#include <util/u_inlines.h> 37#include <util/u_sampler.h> 38#include <util/u_format.h> 39#include <tgsi/tgsi_ureg.h> 40#include "vl_types.h" 41 42#define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f) 43 44#define NR_RENDER_TARGETS 4 45 46enum VS_OUTPUT 47{ 48 VS_O_VPOS, 49 VS_O_L_ADDR0, 50 VS_O_L_ADDR1, 51 VS_O_R_ADDR0, 52 VS_O_R_ADDR1 53}; 54 55static const float const_matrix[8][8] = { 56 { 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.353553f, 0.3535530f }, 57 { 0.4903930f, 0.4157350f, 0.2777850f, 0.0975451f, -0.0975452f, -0.2777850f, -0.415735f, -0.4903930f }, 58 { 0.4619400f, 0.1913420f, -0.1913420f, -0.4619400f, -0.4619400f, -0.1913420f, 0.191342f, 0.4619400f }, 59 { 0.4157350f, -0.0975452f, -0.4903930f, -0.2777850f, 0.2777850f, 0.4903930f, 0.097545f, -0.4157350f }, 60 { 0.3535530f, -0.3535530f, -0.3535530f, 0.3535540f, 0.3535530f, -0.3535540f, -0.353553f, 0.3535530f }, 61 { 0.2777850f, -0.4903930f, 0.0975452f, 0.4157350f, -0.4157350f, -0.0975451f, 0.490393f, -0.2777850f }, 62 { 0.1913420f, -0.4619400f, 0.4619400f, -0.1913420f, -0.1913410f, 0.4619400f, -0.461940f, 0.1913420f }, 63 { 0.0975451f, -0.2777850f, 0.4157350f, -0.4903930f, 0.4903930f, -0.4157350f, 0.277786f, -0.0975458f } 64}; 65 66static void 67calc_addr(struct ureg_program *shader, struct ureg_dst addr[2], 68 struct ureg_src tc, struct ureg_src start, bool right_side, 69 bool transposed, float size) 70{ 71 unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y; 72 unsigned sw_start = right_side ? TGSI_SWIZZLE_Y : TGSI_SWIZZLE_X; 73 74 unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X; 75 unsigned sw_tc = right_side ? TGSI_SWIZZLE_X : TGSI_SWIZZLE_Y; 76 77 /* 78 * addr[0..1].(start) = right_side ? start.x : tc.x 79 * addr[0..1].(tc) = right_side ? tc.y : start.y 80 * addr[0..1].z = tc.z 81 * addr[1].(start) += 1.0f / scale 82 */ 83 ureg_MOV(shader, ureg_writemask(addr[0], wm_start), ureg_scalar(start, sw_start)); 84 ureg_MOV(shader, ureg_writemask(addr[0], wm_tc), ureg_scalar(tc, sw_tc)); 85 ureg_MOV(shader, ureg_writemask(addr[0], TGSI_WRITEMASK_Z), tc); 86 87 ureg_ADD(shader, ureg_writemask(addr[1], wm_start), ureg_scalar(start, sw_start), ureg_imm1f(shader, 1.0f / size)); 88 ureg_MOV(shader, ureg_writemask(addr[1], wm_tc), ureg_scalar(tc, sw_tc)); 89 ureg_MOV(shader, ureg_writemask(addr[1], TGSI_WRITEMASK_Z), tc); 90} 91 92static void * 93create_vert_shader(struct vl_idct *idct, bool matrix_stage) 94{ 95 struct ureg_program *shader; 96 struct ureg_src vrect, vpos, vblock, eb; 97 struct ureg_src scale, blocks_xy; 98 struct ureg_dst t_tex, t_start; 99 struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2]; 100 unsigned label; 101 102 shader = ureg_create(TGSI_PROCESSOR_VERTEX); 103 if (!shader) 104 return NULL; 105 106 t_tex = ureg_DECL_temporary(shader); 107 t_start = ureg_DECL_temporary(shader); 108 109 vrect = ureg_DECL_vs_input(shader, VS_I_RECT); 110 vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); 111 vblock = ureg_swizzle(vrect, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 112 113 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); 114 115 eb = ureg_DECL_vs_input(shader, VS_I_EB); 116 117 o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0); 118 o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1); 119 120 o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0); 121 o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1); 122 123 /* 124 * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height) 125 * blocks_xy = (blocks_x, blocks_y) 126 * 127 * if eb.(vblock.y, vblock.x) 128 * o_vpos.xy = -1 129 * else 130 * t_tex = vpos * blocks_xy + vblock 131 * t_start = t_tex * scale 132 * t_tex = t_tex + vrect 133 * o_vpos.xy = t_tex * scale 134 * 135 * o_l_addr = calc_addr(...) 136 * o_r_addr = calc_addr(...) 137 * endif 138 * o_vpos.zw = vpos 139 * 140 */ 141 142 scale = ureg_imm2f(shader, 143 (float)BLOCK_WIDTH / idct->buffer_width, 144 (float)BLOCK_HEIGHT / idct->buffer_height); 145 146 blocks_xy = ureg_imm2f(shader, idct->blocks_x, idct->blocks_y); 147 148 if (idct->blocks_x > 1 || idct->blocks_y > 1) { 149 ureg_CMP(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), 150 ureg_negate(ureg_scalar(vblock, TGSI_SWIZZLE_Y)), 151 ureg_swizzle(eb, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W), 152 ureg_swizzle(eb, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y)); 153 154 ureg_CMP(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_X), 155 ureg_negate(ureg_scalar(vblock, TGSI_SWIZZLE_X)), 156 ureg_scalar(ureg_src(t_tex), TGSI_SWIZZLE_Y), 157 ureg_scalar(ureg_src(t_tex), TGSI_SWIZZLE_X)); 158 159 eb = ureg_src(t_tex); 160 } 161 162 ureg_IF(shader, ureg_scalar(eb, TGSI_SWIZZLE_X), &label); 163 164 ureg_MOV(shader, o_vpos, ureg_imm1f(shader, -1.0f)); 165 166 ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); 167 ureg_ELSE(shader, &label); 168 169 ureg_MAD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, blocks_xy, vblock); 170 ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale); 171 172 ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), vrect); 173 174 ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale); 175 ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_Z), 176 ureg_scalar(vrect, TGSI_SWIZZLE_X), 177 ureg_imm1f(shader, BLOCK_WIDTH / NR_RENDER_TARGETS)); 178 179 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex)); 180 181 if(matrix_stage) { 182 calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4); 183 calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, BLOCK_WIDTH / 4); 184 } else { 185 calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, BLOCK_WIDTH / 4); 186 calc_addr(shader, o_r_addr, ureg_src(t_tex), ureg_src(t_start), true, false, idct->buffer_height / 4); 187 } 188 189 ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); 190 ureg_ENDIF(shader); 191 192 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos); 193 194 ureg_release_temporary(shader, t_tex); 195 ureg_release_temporary(shader, t_start); 196 197 ureg_END(shader); 198 199 return ureg_create_shader_and_destroy(shader, idct->pipe); 200} 201 202static void 203increment_addr(struct ureg_program *shader, struct ureg_dst daddr[2], 204 struct ureg_src saddr[2], bool right_side, bool transposed, 205 int pos, float size) 206{ 207 unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y; 208 unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X; 209 210 /* 211 * daddr[0..1].(start) = saddr[0..1].(start) 212 * daddr[0..1].(tc) = saddr[0..1].(tc) 213 */ 214 215 ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]); 216 ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size)); 217 ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]); 218 ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size)); 219} 220 221static void 222fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2], struct ureg_src sampler) 223{ 224 ureg_TEX(shader, m[0], TGSI_TEXTURE_3D, addr[0], sampler); 225 ureg_TEX(shader, m[1], TGSI_TEXTURE_3D, addr[1], sampler); 226} 227 228static void 229matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2]) 230{ 231 struct ureg_dst tmp; 232 233 tmp = ureg_DECL_temporary(shader); 234 235 /* 236 * tmp.xy = dot4(m[0][0..1], m[1][0..1]) 237 * dst = tmp.x + tmp.y 238 */ 239 ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0])); 240 ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(l[1]), ureg_src(r[1])); 241 ureg_ADD(shader, dst, 242 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), 243 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); 244 245 ureg_release_temporary(shader, tmp); 246} 247 248static void * 249create_matrix_frag_shader(struct vl_idct *idct) 250{ 251 struct ureg_program *shader; 252 253 struct ureg_src l_addr[2], r_addr[2]; 254 255 struct ureg_dst l[4][2], r[2]; 256 struct ureg_dst fragment[NR_RENDER_TARGETS]; 257 258 unsigned i, j; 259 260 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); 261 if (!shader) 262 return NULL; 263 264 l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); 265 l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); 266 267 r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR); 268 r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR); 269 270 for (i = 0; i < NR_RENDER_TARGETS; ++i) 271 fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i); 272 273 for (i = 0; i < 4; ++i) { 274 l[i][0] = ureg_DECL_temporary(shader); 275 l[i][1] = ureg_DECL_temporary(shader); 276 } 277 278 r[0] = ureg_DECL_temporary(shader); 279 r[1] = ureg_DECL_temporary(shader); 280 281 for (i = 1; i < 4; ++i) { 282 increment_addr(shader, l[i], l_addr, false, false, i, idct->buffer_height); 283 } 284 285 for (i = 0; i < 4; ++i) { 286 struct ureg_src s_addr[2]; 287 s_addr[0] = i == 0 ? l_addr[0] : ureg_src(l[i][0]); 288 s_addr[1] = i == 0 ? l_addr[1] : ureg_src(l[i][1]); 289 fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 1)); 290 } 291 292 for (i = 0; i < NR_RENDER_TARGETS; ++i) { 293 if(i > 0) 294 increment_addr(shader, r, r_addr, true, true, i, BLOCK_HEIGHT); 295 296 struct ureg_src s_addr[2] = { ureg_src(r[0]), ureg_src(r[1]) }; 297 s_addr[0] = i == 0 ? r_addr[0] : ureg_src(r[0]); 298 s_addr[1] = i == 0 ? r_addr[1] : ureg_src(r[1]); 299 fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 0)); 300 301 for (j = 0; j < 4; ++j) { 302 matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r); 303 } 304 } 305 306 for (i = 0; i < 4; ++i) { 307 ureg_release_temporary(shader, l[i][0]); 308 ureg_release_temporary(shader, l[i][1]); 309 } 310 ureg_release_temporary(shader, r[0]); 311 ureg_release_temporary(shader, r[1]); 312 313 ureg_END(shader); 314 315 return ureg_create_shader_and_destroy(shader, idct->pipe); 316} 317 318static void * 319create_transpose_frag_shader(struct vl_idct *idct) 320{ 321 struct ureg_program *shader; 322 323 struct ureg_src l_addr[2], r_addr[2]; 324 325 struct ureg_dst l[2], r[2]; 326 struct ureg_dst fragment; 327 328 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); 329 if (!shader) 330 return NULL; 331 332 l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); 333 l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); 334 335 r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR); 336 r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR); 337 338 l[0] = ureg_DECL_temporary(shader); 339 l[1] = ureg_DECL_temporary(shader); 340 r[0] = ureg_DECL_temporary(shader); 341 r[1] = ureg_DECL_temporary(shader); 342 343 fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 0)); 344 fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 1)); 345 346 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); 347 348 matrix_mul(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X), l, r); 349 350 ureg_release_temporary(shader, l[0]); 351 ureg_release_temporary(shader, l[1]); 352 ureg_release_temporary(shader, r[0]); 353 ureg_release_temporary(shader, r[1]); 354 355 ureg_END(shader); 356 357 return ureg_create_shader_and_destroy(shader, idct->pipe); 358} 359 360static bool 361init_shaders(struct vl_idct *idct) 362{ 363 idct->matrix_vs = create_vert_shader(idct, true); 364 if (!idct->matrix_vs) 365 goto error_matrix_vs; 366 367 idct->matrix_fs = create_matrix_frag_shader(idct); 368 if (!idct->matrix_fs) 369 goto error_matrix_fs; 370 371 idct->transpose_vs = create_vert_shader(idct, false); 372 if (!idct->transpose_vs) 373 goto error_transpose_vs; 374 375 idct->transpose_fs = create_transpose_frag_shader(idct); 376 if (!idct->transpose_fs) 377 goto error_transpose_fs; 378 379 return true; 380 381error_transpose_fs: 382 idct->pipe->delete_vs_state(idct->pipe, idct->transpose_vs); 383 384error_transpose_vs: 385 idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs); 386 387error_matrix_fs: 388 idct->pipe->delete_vs_state(idct->pipe, idct->matrix_vs); 389 390error_matrix_vs: 391 return false; 392} 393 394static void 395cleanup_shaders(struct vl_idct *idct) 396{ 397 idct->pipe->delete_vs_state(idct->pipe, idct->matrix_vs); 398 idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs); 399 idct->pipe->delete_vs_state(idct->pipe, idct->transpose_vs); 400 idct->pipe->delete_fs_state(idct->pipe, idct->transpose_fs); 401} 402 403static bool 404init_state(struct vl_idct *idct) 405{ 406 struct pipe_sampler_state sampler; 407 struct pipe_rasterizer_state rs_state; 408 unsigned i; 409 410 assert(idct); 411 412 memset(&rs_state, 0, sizeof(rs_state)); 413 rs_state.gl_rasterization_rules = false; 414 idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state); 415 if (!idct->rs_state) 416 goto error_rs_state; 417 418 for (i = 0; i < 2; ++i) { 419 memset(&sampler, 0, sizeof(sampler)); 420 sampler.wrap_s = PIPE_TEX_WRAP_REPEAT; 421 sampler.wrap_t = PIPE_TEX_WRAP_REPEAT; 422 sampler.wrap_r = PIPE_TEX_WRAP_REPEAT; 423 sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; 424 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; 425 sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; 426 sampler.compare_mode = PIPE_TEX_COMPARE_NONE; 427 sampler.compare_func = PIPE_FUNC_ALWAYS; 428 sampler.normalized_coords = 1; 429 idct->samplers[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler); 430 if (!idct->samplers[i]) 431 goto error_samplers; 432 } 433 434 return true; 435 436error_samplers: 437 for (i = 0; i < 2; ++i) 438 if (idct->samplers[i]) 439 idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]); 440 441 idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state); 442 443error_rs_state: 444 return false; 445} 446 447static void 448cleanup_state(struct vl_idct *idct) 449{ 450 unsigned i; 451 452 for (i = 0; i < 2; ++i) 453 idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]); 454 455 idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state); 456} 457 458static bool 459init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer) 460{ 461 struct pipe_resource tex_templ, *tex; 462 struct pipe_sampler_view sv_templ; 463 struct pipe_surface surf_templ; 464 unsigned i; 465 466 assert(idct && buffer); 467 468 memset(&tex_templ, 0, sizeof(tex_templ)); 469 tex_templ.target = PIPE_TEXTURE_3D; 470 tex_templ.format = PIPE_FORMAT_R16G16B16A16_SNORM; 471 tex_templ.width0 = idct->buffer_width / NR_RENDER_TARGETS; 472 tex_templ.height0 = idct->buffer_height / 4; 473 tex_templ.depth0 = NR_RENDER_TARGETS; 474 tex_templ.array_size = 1; 475 tex_templ.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET; 476 tex_templ.usage = PIPE_USAGE_STATIC; 477 478 tex = idct->pipe->screen->resource_create(idct->pipe->screen, &tex_templ); 479 if (!tex) 480 goto error_tex; 481 482 memset(&sv_templ, 0, sizeof(sv_templ)); 483 u_sampler_view_default_template(&sv_templ, tex, tex->format); 484 buffer->sampler_views.individual.intermediate = 485 idct->pipe->create_sampler_view(idct->pipe, tex, &sv_templ); 486 if (!buffer->sampler_views.individual.intermediate) 487 goto error_sampler_view; 488 489 buffer->fb_state[0].width = tex->width0; 490 buffer->fb_state[0].height = tex->height0; 491 buffer->fb_state[0].nr_cbufs = NR_RENDER_TARGETS; 492 for(i = 0; i < NR_RENDER_TARGETS; ++i) { 493 memset(&surf_templ, 0, sizeof(surf_templ)); 494 surf_templ.format = tex->format; 495 surf_templ.u.tex.first_layer = i; 496 surf_templ.u.tex.last_layer = i; 497 surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET; 498 buffer->fb_state[0].cbufs[i] = idct->pipe->create_surface( 499 idct->pipe, tex, &surf_templ); 500 501 if (!buffer->fb_state[0].cbufs[i]) 502 goto error_surfaces; 503 } 504 505 buffer->viewport[0].scale[0] = tex->width0; 506 buffer->viewport[0].scale[1] = tex->height0; 507 508 pipe_resource_reference(&tex, NULL); 509 return true; 510 511error_surfaces: 512 for(i = 0; i < NR_RENDER_TARGETS; ++i) 513 pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL); 514 515 pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, NULL); 516 517error_sampler_view: 518 pipe_resource_reference(&tex, NULL); 519 520error_tex: 521 return false; 522} 523 524static void 525cleanup_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer) 526{ 527 unsigned i; 528 529 assert(idct && buffer); 530 531 for(i = 0; i < NR_RENDER_TARGETS; ++i) 532 pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL); 533 534 pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, NULL); 535} 536 537struct pipe_sampler_view * 538vl_idct_upload_matrix(struct pipe_context *pipe) 539{ 540 const float scale = sqrtf(SCALE_FACTOR_16_TO_9); 541 542 struct pipe_resource tex_templ, *matrix; 543 struct pipe_sampler_view sv_templ, *sv; 544 struct pipe_transfer *buf_transfer; 545 unsigned i, j, pitch; 546 float *f; 547 548 struct pipe_box rect = 549 { 550 0, 0, 0, 551 BLOCK_WIDTH / 4, 552 BLOCK_HEIGHT, 553 1 554 }; 555 556 assert(pipe); 557 558 memset(&tex_templ, 0, sizeof(tex_templ)); 559 tex_templ.target = PIPE_TEXTURE_2D; 560 tex_templ.format = PIPE_FORMAT_R32G32B32A32_FLOAT; 561 tex_templ.last_level = 0; 562 tex_templ.width0 = 2; 563 tex_templ.height0 = 8; 564 tex_templ.depth0 = 1; 565 tex_templ.array_size = 1; 566 tex_templ.usage = PIPE_USAGE_IMMUTABLE; 567 tex_templ.bind = PIPE_BIND_SAMPLER_VIEW; 568 tex_templ.flags = 0; 569 570 matrix = pipe->screen->resource_create(pipe->screen, &tex_templ); 571 if (!matrix) 572 goto error_matrix; 573 574 buf_transfer = pipe->get_transfer 575 ( 576 pipe, matrix, 577 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, 578 &rect 579 ); 580 if (!buf_transfer) 581 goto error_transfer; 582 583 pitch = buf_transfer->stride / sizeof(float); 584 585 f = pipe->transfer_map(pipe, buf_transfer); 586 if (!f) 587 goto error_map; 588 589 for(i = 0; i < BLOCK_HEIGHT; ++i) 590 for(j = 0; j < BLOCK_WIDTH; ++j) 591 // transpose and scale 592 f[i * pitch + j] = const_matrix[j][i] * scale; 593 594 pipe->transfer_unmap(pipe, buf_transfer); 595 pipe->transfer_destroy(pipe, buf_transfer); 596 597 memset(&sv_templ, 0, sizeof(sv_templ)); 598 u_sampler_view_default_template(&sv_templ, matrix, matrix->format); 599 sv = pipe->create_sampler_view(pipe, matrix, &sv_templ); 600 pipe_resource_reference(&matrix, NULL); 601 if (!sv) 602 goto error_map; 603 604 return sv; 605 606error_map: 607 pipe->transfer_destroy(pipe, buf_transfer); 608 609error_transfer: 610 pipe_resource_reference(&matrix, NULL); 611 612error_matrix: 613 return NULL; 614} 615 616bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, 617 unsigned buffer_width, unsigned buffer_height, 618 unsigned blocks_x, unsigned blocks_y, 619 struct pipe_sampler_view *matrix) 620{ 621 assert(idct && pipe && matrix); 622 623 idct->pipe = pipe; 624 idct->buffer_width = buffer_width; 625 idct->buffer_height = buffer_height; 626 idct->blocks_x = blocks_x; 627 idct->blocks_y = blocks_y; 628 pipe_sampler_view_reference(&idct->matrix, matrix); 629 630 if(!init_shaders(idct)) 631 return false; 632 633 if(!init_state(idct)) { 634 cleanup_shaders(idct); 635 return false; 636 } 637 638 return true; 639} 640 641void 642vl_idct_cleanup(struct vl_idct *idct) 643{ 644 cleanup_shaders(idct); 645 cleanup_state(idct); 646 647 pipe_sampler_view_reference(&idct->matrix, NULL); 648} 649 650bool 651vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, 652 struct pipe_sampler_view *source, struct pipe_surface *destination) 653{ 654 unsigned i; 655 656 assert(buffer); 657 assert(idct); 658 assert(source); 659 assert(destination); 660 661 pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, idct->matrix); 662 pipe_sampler_view_reference(&buffer->sampler_views.individual.source, source); 663 pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, idct->matrix); 664 665 if (!init_intermediate(idct, buffer)) 666 return false; 667 668 /* init state */ 669 buffer->fb_state[1].width = destination->texture->width0; 670 buffer->fb_state[1].height = destination->texture->height0; 671 buffer->fb_state[1].nr_cbufs = 1; 672 pipe_surface_reference(&buffer->fb_state[1].cbufs[0], destination); 673 674 buffer->viewport[1].scale[0] = destination->texture->width0; 675 buffer->viewport[1].scale[1] = destination->texture->height0; 676 677 for(i = 0; i < 2; ++i) { 678 buffer->viewport[i].scale[2] = 1; 679 buffer->viewport[i].scale[3] = 1; 680 buffer->viewport[i].translate[0] = 0; 681 buffer->viewport[i].translate[1] = 0; 682 buffer->viewport[i].translate[2] = 0; 683 buffer->viewport[i].translate[3] = 0; 684 685 buffer->fb_state[i].zsbuf = NULL; 686 } 687 688 return true; 689} 690 691void 692vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer) 693{ 694 unsigned i; 695 696 assert(idct && buffer); 697 698 for(i = 0; i < NR_RENDER_TARGETS; ++i) 699 pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL); 700 701 pipe_surface_reference(&buffer->fb_state[1].cbufs[0], NULL); 702 703 cleanup_intermediate(idct, buffer); 704} 705 706void 707vl_idct_map_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer) 708{ 709 struct pipe_resource *tex; 710 711 assert(idct && buffer); 712 713 tex = buffer->sampler_views.individual.source->texture; 714 715 struct pipe_box rect = 716 { 717 0, 0, 0, 718 tex->width0, 719 tex->height0, 720 1 721 }; 722 723 buffer->tex_transfer = idct->pipe->get_transfer 724 ( 725 idct->pipe, tex, 726 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, 727 &rect 728 ); 729 730 buffer->texels = idct->pipe->transfer_map(idct->pipe, buffer->tex_transfer); 731} 732 733void 734vl_idct_add_block(struct vl_idct_buffer *buffer, unsigned x, unsigned y, short *block) 735{ 736 unsigned tex_pitch; 737 short *texels; 738 739 unsigned i; 740 741 assert(buffer); 742 assert(block); 743 744 tex_pitch = buffer->tex_transfer->stride / sizeof(short); 745 texels = buffer->texels + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH; 746 747 for (i = 0; i < BLOCK_HEIGHT; ++i) 748 memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * sizeof(short)); 749} 750 751void 752vl_idct_unmap_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer) 753{ 754 assert(idct && buffer); 755 756 idct->pipe->transfer_unmap(idct->pipe, buffer->tex_transfer); 757 idct->pipe->transfer_destroy(idct->pipe, buffer->tex_transfer); 758} 759 760void 761vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_instances) 762{ 763 unsigned num_verts; 764 765 assert(idct); 766 assert(buffer); 767 768 if(num_instances > 0) { 769 num_verts = idct->blocks_x * idct->blocks_y * 4; 770 771 idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state); 772 idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers); 773 774 /* first stage */ 775 idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[0]); 776 idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport[0]); 777 idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]); 778 idct->pipe->bind_vs_state(idct->pipe, idct->matrix_vs); 779 idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs); 780 util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts, 0, num_instances); 781 782 /* second stage */ 783 idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[1]); 784 idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport[1]); 785 idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[1]); 786 idct->pipe->bind_vs_state(idct->pipe, idct->transpose_vs); 787 idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs); 788 util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts, 0, num_instances); 789 } 790} 791