vl_idct.c revision 4ea38176028a6ecfc6ed195f64429b6b34279359
1/************************************************************************** 2 * 3 * Copyright 2010 Christian König 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "vl_idct.h" 29#include "vl_vertex_buffers.h" 30#include "util/u_draw.h" 31#include <assert.h> 32#include <pipe/p_context.h> 33#include <pipe/p_screen.h> 34#include <util/u_inlines.h> 35#include <util/u_sampler.h> 36#include <util/u_format.h> 37#include <tgsi/tgsi_ureg.h> 38#include "vl_types.h" 39 40#define BLOCK_WIDTH 8 41#define BLOCK_HEIGHT 8 42 43#define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f) 44 45#define NR_RENDER_TARGETS 4 46 47enum VS_INPUT 48{ 49 VS_I_RECT, 50 VS_I_VPOS, 51 52 NUM_VS_INPUTS 53}; 54 55enum VS_OUTPUT 56{ 57 VS_O_VPOS, 58 VS_O_L_ADDR0, 59 VS_O_L_ADDR1, 60 VS_O_R_ADDR0, 61 VS_O_R_ADDR1 62}; 63 64static const float const_matrix[8][8] = { 65 { 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.353553f, 0.3535530f }, 66 { 0.4903930f, 0.4157350f, 0.2777850f, 0.0975451f, -0.0975452f, -0.2777850f, -0.415735f, -0.4903930f }, 67 { 0.4619400f, 0.1913420f, -0.1913420f, -0.4619400f, -0.4619400f, -0.1913420f, 0.191342f, 0.4619400f }, 68 { 0.4157350f, -0.0975452f, -0.4903930f, -0.2777850f, 0.2777850f, 0.4903930f, 0.097545f, -0.4157350f }, 69 { 0.3535530f, -0.3535530f, -0.3535530f, 0.3535540f, 0.3535530f, -0.3535540f, -0.353553f, 0.3535530f }, 70 { 0.2777850f, -0.4903930f, 0.0975452f, 0.4157350f, -0.4157350f, -0.0975451f, 0.490393f, -0.2777850f }, 71 { 0.1913420f, -0.4619400f, 0.4619400f, -0.1913420f, -0.1913410f, 0.4619400f, -0.461940f, 0.1913420f }, 72 { 0.0975451f, -0.2777850f, 0.4157350f, -0.4903930f, 0.4903930f, -0.4157350f, 0.277786f, -0.0975458f } 73}; 74 75static void 76calc_addr(struct ureg_program *shader, struct ureg_dst addr[2], 77 struct ureg_src tc, struct ureg_src start, bool right_side, 78 bool transposed, float size) 79{ 80 unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y; 81 unsigned sw_start = right_side ? TGSI_SWIZZLE_Y : TGSI_SWIZZLE_X; 82 83 unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X; 84 unsigned sw_tc = right_side ? TGSI_SWIZZLE_X : TGSI_SWIZZLE_Y; 85 86 /* 87 * addr[0..1].(start) = right_side ? start.x : tc.x 88 * addr[0..1].(tc) = right_side ? tc.y : start.y 89 * addr[0..1].z = tc.z 90 * addr[1].(start) += 1.0f / scale 91 */ 92 ureg_MOV(shader, ureg_writemask(addr[0], wm_start), ureg_scalar(start, sw_start)); 93 ureg_MOV(shader, ureg_writemask(addr[0], wm_tc), ureg_scalar(tc, sw_tc)); 94 ureg_MOV(shader, ureg_writemask(addr[0], TGSI_WRITEMASK_Z), tc); 95 96 ureg_ADD(shader, ureg_writemask(addr[1], wm_start), ureg_scalar(start, sw_start), ureg_imm1f(shader, 1.0f / size)); 97 ureg_MOV(shader, ureg_writemask(addr[1], wm_tc), ureg_scalar(tc, sw_tc)); 98 ureg_MOV(shader, ureg_writemask(addr[1], TGSI_WRITEMASK_Z), tc); 99} 100 101static void * 102create_vert_shader(struct vl_idct *idct, bool matrix_stage) 103{ 104 struct ureg_program *shader; 105 struct ureg_src scale; 106 struct ureg_src vrect, vpos; 107 struct ureg_dst t_tex, t_start; 108 struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2]; 109 110 shader = ureg_create(TGSI_PROCESSOR_VERTEX); 111 if (!shader) 112 return NULL; 113 114 t_tex = ureg_DECL_temporary(shader); 115 t_start = ureg_DECL_temporary(shader); 116 117 vrect = ureg_DECL_vs_input(shader, VS_I_RECT); 118 vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); 119 120 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); 121 122 o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0); 123 o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1); 124 125 o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0); 126 o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1); 127 128 /* 129 * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height) 130 * 131 * t_vpos = vpos + vrect 132 * o_vpos.xy = t_vpos * scale 133 * o_vpos.zw = vpos 134 * 135 * o_l_addr = calc_addr(...) 136 * o_r_addr = calc_addr(...) 137 * 138 */ 139 scale = ureg_imm2f(shader, 140 (float)BLOCK_WIDTH / idct->buffer_width, 141 (float)BLOCK_HEIGHT / idct->buffer_height); 142 143 ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, vrect); 144 ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale); 145 ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_Z), 146 ureg_scalar(vrect, TGSI_SWIZZLE_X), 147 ureg_imm1f(shader, BLOCK_WIDTH / NR_RENDER_TARGETS)); 148 149 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex)); 150 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos); 151 152 ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale); 153 154 if(matrix_stage) { 155 calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4); 156 calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, BLOCK_WIDTH / 4); 157 } else { 158 calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, BLOCK_WIDTH / 4); 159 calc_addr(shader, o_r_addr, ureg_src(t_tex), ureg_src(t_start), true, false, idct->buffer_height / 4); 160 } 161 162 ureg_release_temporary(shader, t_tex); 163 ureg_release_temporary(shader, t_start); 164 165 ureg_END(shader); 166 167 return ureg_create_shader_and_destroy(shader, idct->pipe); 168} 169 170static void 171increment_addr(struct ureg_program *shader, struct ureg_dst daddr[2], 172 struct ureg_src saddr[2], bool right_side, bool transposed, 173 int pos, float size) 174{ 175 unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y; 176 unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X; 177 178 /* 179 * daddr[0..1].(start) = saddr[0..1].(start) 180 * daddr[0..1].(tc) = saddr[0..1].(tc) 181 */ 182 183 ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]); 184 ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size)); 185 ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]); 186 ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size)); 187} 188 189static void 190fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2], struct ureg_src sampler) 191{ 192 ureg_TEX(shader, m[0], TGSI_TEXTURE_3D, addr[0], sampler); 193 ureg_TEX(shader, m[1], TGSI_TEXTURE_3D, addr[1], sampler); 194} 195 196static void 197matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2]) 198{ 199 struct ureg_dst tmp; 200 201 tmp = ureg_DECL_temporary(shader); 202 203 /* 204 * tmp.xy = dot4(m[0][0..1], m[1][0..1]) 205 * dst = tmp.x + tmp.y 206 */ 207 ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0])); 208 ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(l[1]), ureg_src(r[1])); 209 ureg_ADD(shader, dst, 210 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), 211 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); 212 213 ureg_release_temporary(shader, tmp); 214} 215 216static void * 217create_matrix_frag_shader(struct vl_idct *idct) 218{ 219 struct ureg_program *shader; 220 221 struct ureg_src l_addr[2], r_addr[2]; 222 223 struct ureg_dst l[4][2], r[2]; 224 struct ureg_dst fragment[NR_RENDER_TARGETS]; 225 226 unsigned i, j; 227 228 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); 229 if (!shader) 230 return NULL; 231 232 l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); 233 l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); 234 235 r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR); 236 r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR); 237 238 for (i = 0; i < NR_RENDER_TARGETS; ++i) 239 fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i); 240 241 for (i = 0; i < 4; ++i) { 242 l[i][0] = ureg_DECL_temporary(shader); 243 l[i][1] = ureg_DECL_temporary(shader); 244 } 245 246 r[0] = ureg_DECL_temporary(shader); 247 r[1] = ureg_DECL_temporary(shader); 248 249 for (i = 1; i < 4; ++i) { 250 increment_addr(shader, l[i], l_addr, false, false, i, idct->buffer_height); 251 } 252 253 for (i = 0; i < 4; ++i) { 254 struct ureg_src s_addr[2]; 255 s_addr[0] = i == 0 ? l_addr[0] : ureg_src(l[i][0]); 256 s_addr[1] = i == 0 ? l_addr[1] : ureg_src(l[i][1]); 257 fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 1)); 258 } 259 260 for (i = 0; i < NR_RENDER_TARGETS; ++i) { 261 if(i > 0) 262 increment_addr(shader, r, r_addr, true, true, i, BLOCK_HEIGHT); 263 264 struct ureg_src s_addr[2] = { ureg_src(r[0]), ureg_src(r[1]) }; 265 s_addr[0] = i == 0 ? r_addr[0] : ureg_src(r[0]); 266 s_addr[1] = i == 0 ? r_addr[1] : ureg_src(r[1]); 267 fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 0)); 268 269 for (j = 0; j < 4; ++j) { 270 matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r); 271 } 272 } 273 274 for (i = 0; i < 4; ++i) { 275 ureg_release_temporary(shader, l[i][0]); 276 ureg_release_temporary(shader, l[i][1]); 277 } 278 ureg_release_temporary(shader, r[0]); 279 ureg_release_temporary(shader, r[1]); 280 281 ureg_END(shader); 282 283 return ureg_create_shader_and_destroy(shader, idct->pipe); 284} 285 286static void * 287create_transpose_frag_shader(struct vl_idct *idct) 288{ 289 struct ureg_program *shader; 290 291 struct ureg_src l_addr[2], r_addr[2]; 292 293 struct ureg_dst l[2], r[2]; 294 struct ureg_dst fragment; 295 296 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); 297 if (!shader) 298 return NULL; 299 300 l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); 301 l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); 302 303 r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR); 304 r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR); 305 306 l[0] = ureg_DECL_temporary(shader); 307 l[1] = ureg_DECL_temporary(shader); 308 r[0] = ureg_DECL_temporary(shader); 309 r[1] = ureg_DECL_temporary(shader); 310 311 fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 0)); 312 fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 1)); 313 314 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); 315 316 matrix_mul(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X), l, r); 317 318 ureg_release_temporary(shader, l[0]); 319 ureg_release_temporary(shader, l[1]); 320 ureg_release_temporary(shader, r[0]); 321 ureg_release_temporary(shader, r[1]); 322 323 ureg_END(shader); 324 325 return ureg_create_shader_and_destroy(shader, idct->pipe); 326} 327 328static bool 329init_shaders(struct vl_idct *idct) 330{ 331 idct->matrix_vs = create_vert_shader(idct, true); 332 idct->matrix_fs = create_matrix_frag_shader(idct); 333 334 idct->transpose_vs = create_vert_shader(idct, false); 335 idct->transpose_fs = create_transpose_frag_shader(idct); 336 337 return 338 idct->matrix_vs != NULL && 339 idct->matrix_fs != NULL && 340 idct->transpose_vs != NULL && 341 idct->transpose_fs != NULL; 342} 343 344static void 345cleanup_shaders(struct vl_idct *idct) 346{ 347 idct->pipe->delete_vs_state(idct->pipe, idct->matrix_vs); 348 idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs); 349 idct->pipe->delete_vs_state(idct->pipe, idct->transpose_vs); 350 idct->pipe->delete_fs_state(idct->pipe, idct->transpose_fs); 351} 352 353static bool 354init_state(struct vl_idct *idct) 355{ 356 struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS]; 357 struct pipe_sampler_state sampler; 358 struct pipe_rasterizer_state rs_state; 359 unsigned i; 360 361 assert(idct); 362 363 idct->quad = vl_vb_upload_quads(idct->pipe, idct->max_blocks); 364 365 if(idct->quad.buffer == NULL) 366 return false; 367 368 for (i = 0; i < 4; ++i) { 369 memset(&sampler, 0, sizeof(sampler)); 370 sampler.wrap_s = PIPE_TEX_WRAP_REPEAT; 371 sampler.wrap_t = PIPE_TEX_WRAP_REPEAT; 372 sampler.wrap_r = PIPE_TEX_WRAP_REPEAT; 373 sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; 374 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; 375 sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; 376 sampler.compare_mode = PIPE_TEX_COMPARE_NONE; 377 sampler.compare_func = PIPE_FUNC_ALWAYS; 378 sampler.normalized_coords = 1; 379 /*sampler.shadow_ambient = ; */ 380 /*sampler.lod_bias = ; */ 381 sampler.min_lod = 0; 382 /*sampler.max_lod = ; */ 383 /*sampler.border_color[0] = ; */ 384 /*sampler.max_anisotropy = ; */ 385 idct->samplers.all[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler); 386 } 387 388 memset(&rs_state, 0, sizeof(rs_state)); 389 /*rs_state.sprite_coord_enable */ 390 rs_state.sprite_coord_mode = PIPE_SPRITE_COORD_UPPER_LEFT; 391 rs_state.point_quad_rasterization = true; 392 rs_state.point_size = BLOCK_WIDTH; 393 rs_state.gl_rasterization_rules = false; 394 idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state); 395 396 vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element(); 397 398 /* Pos element */ 399 vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R16G16_SSCALED; 400 401 idct->vertex_buffer_stride = vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 1, 1); 402 idct->vertex_elems_state = idct->pipe->create_vertex_elements_state(idct->pipe, 2, vertex_elems); 403 404 return true; 405} 406 407static void 408cleanup_state(struct vl_idct *idct) 409{ 410 unsigned i; 411 412 for (i = 0; i < 4; ++i) 413 idct->pipe->delete_sampler_state(idct->pipe, idct->samplers.all[i]); 414 415 idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state); 416 idct->pipe->delete_vertex_elements_state(idct->pipe, idct->vertex_elems_state); 417} 418 419static bool 420init_textures(struct vl_idct *idct, struct vl_idct_buffer *buffer) 421{ 422 struct pipe_resource template; 423 struct pipe_sampler_view sampler_view; 424 unsigned i; 425 426 assert(idct && buffer); 427 428 /* create textures */ 429 memset(&template, 0, sizeof(struct pipe_resource)); 430 template.last_level = 0; 431 template.bind = PIPE_BIND_SAMPLER_VIEW; 432 template.flags = 0; 433 434 template.target = PIPE_TEXTURE_2D; 435 template.format = PIPE_FORMAT_R16G16B16A16_SNORM; 436 template.width0 = idct->buffer_width / 4; 437 template.height0 = idct->buffer_height; 438 template.depth0 = 1; 439 template.array_size = 1; 440 template.usage = PIPE_USAGE_STREAM; 441 buffer->textures.individual.source = idct->pipe->screen->resource_create(idct->pipe->screen, &template); 442 443 template.target = PIPE_TEXTURE_3D; 444 template.format = PIPE_FORMAT_R16G16B16A16_SNORM; 445 template.width0 = idct->buffer_width / NR_RENDER_TARGETS; 446 template.height0 = idct->buffer_height / 4; 447 template.depth0 = NR_RENDER_TARGETS; 448 template.usage = PIPE_USAGE_STATIC; 449 buffer->textures.individual.intermediate = idct->pipe->screen->resource_create(idct->pipe->screen, &template); 450 451 for (i = 0; i < 4; ++i) { 452 if(buffer->textures.all[i] == NULL) 453 return false; /* a texture failed to allocate */ 454 455 u_sampler_view_default_template(&sampler_view, buffer->textures.all[i], buffer->textures.all[i]->format); 456 buffer->sampler_views.all[i] = idct->pipe->create_sampler_view(idct->pipe, buffer->textures.all[i], &sampler_view); 457 } 458 459 return true; 460} 461 462static void 463cleanup_textures(struct vl_idct *idct, struct vl_idct_buffer *buffer) 464{ 465 unsigned i; 466 467 assert(idct && buffer); 468 469 for (i = 0; i < 4; ++i) { 470 pipe_sampler_view_reference(&buffer->sampler_views.all[i], NULL); 471 pipe_resource_reference(&buffer->textures.all[i], NULL); 472 } 473} 474 475static bool 476init_vertex_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer) 477{ 478 assert(idct && buffer); 479 480 buffer->vertex_bufs.individual.quad.stride = idct->quad.stride; 481 buffer->vertex_bufs.individual.quad.buffer_offset = idct->quad.buffer_offset; 482 pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, idct->quad.buffer); 483 484 buffer->vertex_bufs.individual.pos = vl_vb_init( 485 &buffer->blocks, idct->pipe, idct->max_blocks, 486 idct->vertex_buffer_stride); 487 488 if(buffer->vertex_bufs.individual.pos.buffer == NULL) 489 return false; 490 491 return true; 492} 493 494static void 495cleanup_vertex_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer) 496{ 497 assert(idct && buffer); 498 499 pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, NULL); 500 pipe_resource_reference(&buffer->vertex_bufs.individual.pos.buffer, NULL); 501 502 vl_vb_cleanup(&buffer->blocks); 503} 504 505struct pipe_resource * 506vl_idct_upload_matrix(struct pipe_context *pipe) 507{ 508 struct pipe_resource template, *matrix; 509 struct pipe_transfer *buf_transfer; 510 unsigned i, j, pitch; 511 float *f; 512 513 struct pipe_box rect = 514 { 515 0, 0, 0, 516 BLOCK_WIDTH / 4, 517 BLOCK_HEIGHT, 518 1 519 }; 520 521 memset(&template, 0, sizeof(struct pipe_resource)); 522 template.target = PIPE_TEXTURE_2D; 523 template.format = PIPE_FORMAT_R32G32B32A32_FLOAT; 524 template.last_level = 0; 525 template.width0 = 2; 526 template.height0 = 8; 527 template.depth0 = 1; 528 template.array_size = 1; 529 template.usage = PIPE_USAGE_IMMUTABLE; 530 template.bind = PIPE_BIND_SAMPLER_VIEW; 531 template.flags = 0; 532 533 matrix = pipe->screen->resource_create(pipe->screen, &template); 534 535 /* matrix */ 536 buf_transfer = pipe->get_transfer 537 ( 538 pipe, matrix, 539 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, 540 &rect 541 ); 542 pitch = buf_transfer->stride / sizeof(float); 543 544 f = pipe->transfer_map(pipe, buf_transfer); 545 for(i = 0; i < BLOCK_HEIGHT; ++i) 546 for(j = 0; j < BLOCK_WIDTH; ++j) 547 // transpose and scale 548 f[i * pitch + j] = const_matrix[j][i] * sqrtf(SCALE_FACTOR_16_TO_9); 549 550 pipe->transfer_unmap(pipe, buf_transfer); 551 pipe->transfer_destroy(pipe, buf_transfer); 552 553 return matrix; 554} 555 556bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, 557 unsigned buffer_width, unsigned buffer_height, 558 struct pipe_resource *matrix) 559{ 560 assert(idct && pipe && matrix); 561 562 idct->pipe = pipe; 563 idct->buffer_width = buffer_width; 564 idct->buffer_height = buffer_height; 565 pipe_resource_reference(&idct->matrix, matrix); 566 567 idct->max_blocks = 568 align(buffer_width, BLOCK_WIDTH) / BLOCK_WIDTH * 569 align(buffer_height, BLOCK_HEIGHT) / BLOCK_HEIGHT; 570 571 if(!init_shaders(idct)) 572 return false; 573 574 if(!init_state(idct)) { 575 cleanup_shaders(idct); 576 return false; 577 } 578 579 return true; 580} 581 582void 583vl_idct_cleanup(struct vl_idct *idct) 584{ 585 cleanup_shaders(idct); 586 cleanup_state(idct); 587 588 pipe_resource_reference(&idct->matrix, NULL); 589} 590 591bool 592vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, struct pipe_resource *dst) 593{ 594 struct pipe_surface template; 595 596 unsigned i; 597 598 assert(buffer); 599 assert(idct); 600 assert(dst); 601 602 pipe_resource_reference(&buffer->textures.individual.matrix, idct->matrix); 603 pipe_resource_reference(&buffer->textures.individual.transpose, idct->matrix); 604 pipe_resource_reference(&buffer->destination, dst); 605 606 if (!init_textures(idct, buffer)) 607 return false; 608 609 if (!init_vertex_buffers(idct, buffer)) 610 return false; 611 612 /* init state */ 613 buffer->viewport[0].scale[0] = buffer->textures.individual.intermediate->width0; 614 buffer->viewport[0].scale[1] = buffer->textures.individual.intermediate->height0; 615 616 buffer->viewport[1].scale[0] = buffer->destination->width0; 617 buffer->viewport[1].scale[1] = buffer->destination->height0; 618 619 buffer->fb_state[0].width = buffer->textures.individual.intermediate->width0; 620 buffer->fb_state[0].height = buffer->textures.individual.intermediate->height0; 621 622 buffer->fb_state[0].nr_cbufs = NR_RENDER_TARGETS; 623 for(i = 0; i < NR_RENDER_TARGETS; ++i) { 624 memset(&template, 0, sizeof(template)); 625 template.format = buffer->textures.individual.intermediate->format; 626 template.u.tex.first_layer = i; 627 template.u.tex.last_layer = i; 628 template.usage = PIPE_BIND_RENDER_TARGET; 629 buffer->fb_state[0].cbufs[i] = idct->pipe->create_surface( 630 idct->pipe, buffer->textures.individual.intermediate, 631 &template); 632 } 633 634 buffer->fb_state[1].width = buffer->destination->width0; 635 buffer->fb_state[1].height = buffer->destination->height0; 636 637 buffer->fb_state[1].nr_cbufs = 1; 638 639 memset(&template, 0, sizeof(template)); 640 template.format = buffer->destination->format; 641 template.usage = PIPE_BIND_RENDER_TARGET; 642 buffer->fb_state[1].cbufs[0] = idct->pipe->create_surface( 643 idct->pipe, buffer->destination, &template); 644 645 for(i = 0; i < 2; ++i) { 646 buffer->viewport[i].scale[2] = 1; 647 buffer->viewport[i].scale[3] = 1; 648 buffer->viewport[i].translate[0] = 0; 649 buffer->viewport[i].translate[1] = 0; 650 buffer->viewport[i].translate[2] = 0; 651 buffer->viewport[i].translate[3] = 0; 652 653 buffer->fb_state[i].zsbuf = NULL; 654 } 655 656 return true; 657} 658 659void 660vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer) 661{ 662 unsigned i; 663 664 assert(buffer); 665 666 for(i = 0; i < NR_RENDER_TARGETS; ++i) { 667 idct->pipe->surface_destroy(idct->pipe, buffer->fb_state[0].cbufs[i]); 668 } 669 670 idct->pipe->surface_destroy(idct->pipe, buffer->fb_state[1].cbufs[0]); 671 672 cleanup_textures(idct, buffer); 673 cleanup_vertex_buffers(idct, buffer); 674} 675 676void 677vl_idct_map_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer) 678{ 679 assert(idct); 680 681 struct pipe_box rect = 682 { 683 0, 0, 0, 684 buffer->textures.individual.source->width0, 685 buffer->textures.individual.source->height0, 686 1 687 }; 688 689 buffer->tex_transfer = idct->pipe->get_transfer 690 ( 691 idct->pipe, buffer->textures.individual.source, 692 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, 693 &rect 694 ); 695 696 buffer->texels = idct->pipe->transfer_map(idct->pipe, buffer->tex_transfer); 697 698 vl_vb_map(&buffer->blocks, idct->pipe); 699} 700 701void 702vl_idct_add_block(struct vl_idct_buffer *buffer, unsigned x, unsigned y, short *block) 703{ 704 struct vertex2s v; 705 unsigned tex_pitch; 706 short *texels; 707 708 unsigned i; 709 710 assert(buffer); 711 712 tex_pitch = buffer->tex_transfer->stride / sizeof(short); 713 texels = buffer->texels + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH; 714 715 for (i = 0; i < BLOCK_HEIGHT; ++i) 716 memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * sizeof(short)); 717 718 v.x = x; 719 v.y = y; 720 vl_vb_add_block(&buffer->blocks, &v); 721} 722 723void 724vl_idct_unmap_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer) 725{ 726 assert(idct && buffer); 727 728 idct->pipe->transfer_unmap(idct->pipe, buffer->tex_transfer); 729 idct->pipe->transfer_destroy(idct->pipe, buffer->tex_transfer); 730 vl_vb_unmap(&buffer->blocks, idct->pipe); 731} 732 733void 734vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer) 735{ 736 unsigned num_verts; 737 738 assert(idct); 739 740 num_verts = vl_vb_restart(&buffer->blocks); 741 742 if(num_verts > 0) { 743 744 idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state); 745 idct->pipe->set_vertex_buffers(idct->pipe, 2, buffer->vertex_bufs.all); 746 idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state); 747 748 /* first stage */ 749 idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[0]); 750 idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport[0]); 751 idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]); 752 idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[0]); 753 idct->pipe->bind_vs_state(idct->pipe, idct->matrix_vs); 754 idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs); 755 util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_verts); 756 757 /* second stage */ 758 idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[1]); 759 idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport[1]); 760 idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[1]); 761 idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[1]); 762 idct->pipe->bind_vs_state(idct->pipe, idct->transpose_vs); 763 idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs); 764 util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_verts); 765 } 766} 767