vl_idct.c revision be4de05c1093db27b3fca12b782055ab8a1eba13
1/************************************************************************** 2 * 3 * Copyright 2010 Christian König 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "vl_idct.h" 29#include "vl_vertex_buffers.h" 30#include "util/u_draw.h" 31#include <assert.h> 32#include <pipe/p_context.h> 33#include <pipe/p_screen.h> 34#include <util/u_inlines.h> 35#include <util/u_sampler.h> 36#include <util/u_format.h> 37#include <tgsi/tgsi_ureg.h> 38#include "vl_types.h" 39 40#define BLOCK_WIDTH 8 41#define BLOCK_HEIGHT 8 42 43#define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f) 44 45#define STAGE1_SCALE 4.0f 46#define STAGE2_SCALE (SCALE_FACTOR_16_TO_9 / STAGE1_SCALE / STAGE1_SCALE) 47 48#define NR_RENDER_TARGETS 4 49 50enum VS_INPUT 51{ 52 VS_I_RECT, 53 VS_I_VPOS, 54 55 NUM_VS_INPUTS 56}; 57 58enum VS_OUTPUT 59{ 60 VS_O_VPOS, 61 VS_O_L_ADDR0, 62 VS_O_L_ADDR1, 63 VS_O_R_ADDR0, 64 VS_O_R_ADDR1 65}; 66 67static const float const_matrix[8][8] = { 68 { 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.353553f, 0.3535530f }, 69 { 0.4903930f, 0.4157350f, 0.2777850f, 0.0975451f, -0.0975452f, -0.2777850f, -0.415735f, -0.4903930f }, 70 { 0.4619400f, 0.1913420f, -0.1913420f, -0.4619400f, -0.4619400f, -0.1913420f, 0.191342f, 0.4619400f }, 71 { 0.4157350f, -0.0975452f, -0.4903930f, -0.2777850f, 0.2777850f, 0.4903930f, 0.097545f, -0.4157350f }, 72 { 0.3535530f, -0.3535530f, -0.3535530f, 0.3535540f, 0.3535530f, -0.3535540f, -0.353553f, 0.3535530f }, 73 { 0.2777850f, -0.4903930f, 0.0975452f, 0.4157350f, -0.4157350f, -0.0975451f, 0.490393f, -0.2777850f }, 74 { 0.1913420f, -0.4619400f, 0.4619400f, -0.1913420f, -0.1913410f, 0.4619400f, -0.461940f, 0.1913420f }, 75 { 0.0975451f, -0.2777850f, 0.4157350f, -0.4903930f, 0.4903930f, -0.4157350f, 0.277786f, -0.0975458f } 76}; 77 78static void 79calc_addr(struct ureg_program *shader, struct ureg_dst addr[2], 80 struct ureg_src tc, struct ureg_src start, bool right_side, 81 bool transposed, float size) 82{ 83 unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y; 84 unsigned sw_start = right_side ? TGSI_SWIZZLE_Y : TGSI_SWIZZLE_X; 85 86 unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X; 87 unsigned sw_tc = right_side ? TGSI_SWIZZLE_X : TGSI_SWIZZLE_Y; 88 89 /* 90 * addr[0..1].(start) = right_side ? start.x : tc.x 91 * addr[0..1].(tc) = right_side ? tc.y : start.y 92 * addr[0..1].z = tc.z 93 * addr[1].(start) += 1.0f / scale 94 */ 95 ureg_MOV(shader, ureg_writemask(addr[0], wm_start), ureg_scalar(start, sw_start)); 96 ureg_MOV(shader, ureg_writemask(addr[0], wm_tc), ureg_scalar(tc, sw_tc)); 97 ureg_MOV(shader, ureg_writemask(addr[0], TGSI_WRITEMASK_Z), tc); 98 99 ureg_ADD(shader, ureg_writemask(addr[1], wm_start), ureg_scalar(start, sw_start), ureg_imm1f(shader, 1.0f / size)); 100 ureg_MOV(shader, ureg_writemask(addr[1], wm_tc), ureg_scalar(tc, sw_tc)); 101 ureg_MOV(shader, ureg_writemask(addr[1], TGSI_WRITEMASK_Z), tc); 102} 103 104static void * 105create_vert_shader(struct vl_idct *idct, bool matrix_stage) 106{ 107 struct ureg_program *shader; 108 struct ureg_src scale; 109 struct ureg_src vrect, vpos; 110 struct ureg_dst t_tex, t_start; 111 struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2]; 112 113 shader = ureg_create(TGSI_PROCESSOR_VERTEX); 114 if (!shader) 115 return NULL; 116 117 t_tex = ureg_DECL_temporary(shader); 118 t_start = ureg_DECL_temporary(shader); 119 120 vrect = ureg_DECL_vs_input(shader, VS_I_RECT); 121 vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); 122 123 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); 124 125 o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0); 126 o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1); 127 128 o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0); 129 o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1); 130 131 /* 132 * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height) 133 * 134 * t_vpos = vpos + vrect 135 * o_vpos.xy = t_vpos * scale 136 * o_vpos.zw = vpos 137 * 138 * o_l_addr = calc_addr(...) 139 * o_r_addr = calc_addr(...) 140 * 141 */ 142 scale = ureg_imm2f(shader, 143 (float)BLOCK_WIDTH / idct->buffer_width, 144 (float)BLOCK_HEIGHT / idct->buffer_height); 145 146 ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, vrect); 147 ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale); 148 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex)); 149 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos); 150 151 ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_Z), 152 ureg_scalar(vrect, TGSI_SWIZZLE_X), 153 ureg_imm1f(shader, BLOCK_WIDTH / NR_RENDER_TARGETS)); 154 155 ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale); 156 157 if(matrix_stage) { 158 calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4); 159 calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, BLOCK_WIDTH / 4); 160 } else { 161 calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, BLOCK_WIDTH / 4); 162 calc_addr(shader, o_r_addr, ureg_src(t_tex), ureg_src(t_start), true, false, idct->buffer_height / 4); 163 } 164 165 ureg_release_temporary(shader, t_tex); 166 ureg_release_temporary(shader, t_start); 167 168 ureg_END(shader); 169 170 return ureg_create_shader_and_destroy(shader, idct->pipe); 171} 172 173static void 174increment_addr(struct ureg_program *shader, struct ureg_dst addr[2], 175 bool right_side, bool transposed, float size) 176{ 177 unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X; 178 179 /* addr[0..1]++ */ 180 ureg_ADD(shader, ureg_writemask(addr[0], wm_tc), 181 ureg_src(addr[0]), ureg_imm1f(shader, 1.0f / size)); 182 ureg_ADD(shader, ureg_writemask(addr[1], wm_tc), 183 ureg_src(addr[1]), ureg_imm1f(shader, 1.0f / size)); 184} 185 186static void 187fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2], struct ureg_src sampler) 188{ 189 m[0] = ureg_DECL_temporary(shader); 190 m[1] = ureg_DECL_temporary(shader); 191 192 ureg_TEX(shader, m[0], TGSI_TEXTURE_3D, addr[0], sampler); 193 ureg_TEX(shader, m[1], TGSI_TEXTURE_3D, addr[1], sampler); 194} 195 196static void 197matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2]) 198{ 199 struct ureg_dst tmp; 200 201 tmp = ureg_DECL_temporary(shader); 202 203 /* 204 * tmp.xy = dot4(m[0][0..1], m[1][0..1]) 205 * dst = tmp.x + tmp.y 206 */ 207 ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0])); 208 ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(l[1]), ureg_src(r[1])); 209 ureg_ADD(shader, dst, 210 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), 211 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); 212 213 ureg_release_temporary(shader, tmp); 214} 215 216static void * 217create_matrix_frag_shader(struct vl_idct *idct) 218{ 219 struct ureg_program *shader; 220 221 struct ureg_src l_addr[2], r_addr[2], saddr[2]; 222 223 struct ureg_dst addr[2], l[4][2], r[2]; 224 struct ureg_dst fragment[NR_RENDER_TARGETS]; 225 226 unsigned i, j; 227 228 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); 229 if (!shader) 230 return NULL; 231 232 addr[0] = ureg_DECL_temporary(shader); 233 addr[1] = ureg_DECL_temporary(shader); 234 235 saddr[0] = ureg_src(addr[0]); 236 saddr[1] = ureg_src(addr[1]); 237 238 l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); 239 l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); 240 241 r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR); 242 r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR); 243 244 for (i = 0; i < NR_RENDER_TARGETS; ++i) 245 fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i); 246 247 for (i = 0; i < 4; ++i) { 248 if(i == 0) { 249 ureg_MOV(shader, addr[0], l_addr[0]); 250 ureg_MOV(shader, addr[1], l_addr[1]); 251 } else 252 increment_addr(shader, addr, false, false, idct->buffer_height); 253 254 fetch_four(shader, l[i], saddr, ureg_DECL_sampler(shader, 1)); 255 } 256 257 for (i = 0; i < NR_RENDER_TARGETS; ++i) { 258 if(i == 0) { 259 ureg_MOV(shader, addr[0], r_addr[0]); 260 ureg_MOV(shader, addr[1], r_addr[1]); 261 } else 262 increment_addr(shader, addr, true, true, BLOCK_HEIGHT); 263 264 fetch_four(shader, r, saddr, ureg_DECL_sampler(shader, 0)); 265 266 for (j = 0; j < 4; ++j) { 267 matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r); 268 } 269 ureg_release_temporary(shader, r[0]); 270 ureg_release_temporary(shader, r[1]); 271 } 272 273 for (i = 0; i < 4; ++i) { 274 ureg_release_temporary(shader, l[i][0]); 275 ureg_release_temporary(shader, l[i][1]); 276 } 277 ureg_release_temporary(shader, addr[0]); 278 ureg_release_temporary(shader, addr[1]); 279 280 ureg_END(shader); 281 282 return ureg_create_shader_and_destroy(shader, idct->pipe); 283} 284 285static void * 286create_transpose_frag_shader(struct vl_idct *idct) 287{ 288 struct ureg_program *shader; 289 290 struct ureg_src l_addr[2], r_addr[2]; 291 292 struct ureg_dst l[2], r[2]; 293 struct ureg_dst tmp, fragment; 294 295 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); 296 if (!shader) 297 return NULL; 298 299 l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); 300 l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); 301 302 r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR); 303 r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR); 304 305 fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 0)); 306 fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 1)); 307 308 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); 309 310 tmp = ureg_DECL_temporary(shader); 311 matrix_mul(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), l, r); 312 ureg_MUL(shader, fragment, ureg_src(tmp), ureg_imm1f(shader, STAGE2_SCALE)); 313 314 ureg_release_temporary(shader, tmp); 315 ureg_release_temporary(shader, l[0]); 316 ureg_release_temporary(shader, l[1]); 317 ureg_release_temporary(shader, r[0]); 318 ureg_release_temporary(shader, r[1]); 319 320 ureg_END(shader); 321 322 return ureg_create_shader_and_destroy(shader, idct->pipe); 323} 324 325static bool 326init_shaders(struct vl_idct *idct) 327{ 328 idct->matrix_vs = create_vert_shader(idct, true); 329 idct->matrix_fs = create_matrix_frag_shader(idct); 330 331 idct->transpose_vs = create_vert_shader(idct, false); 332 idct->transpose_fs = create_transpose_frag_shader(idct); 333 334 return 335 idct->matrix_vs != NULL && 336 idct->matrix_fs != NULL && 337 idct->transpose_vs != NULL && 338 idct->transpose_fs != NULL; 339} 340 341static void 342cleanup_shaders(struct vl_idct *idct) 343{ 344 idct->pipe->delete_vs_state(idct->pipe, idct->matrix_vs); 345 idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs); 346 idct->pipe->delete_vs_state(idct->pipe, idct->transpose_vs); 347 idct->pipe->delete_fs_state(idct->pipe, idct->transpose_fs); 348} 349 350static bool 351init_state(struct vl_idct *idct) 352{ 353 struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS]; 354 struct pipe_sampler_state sampler; 355 struct pipe_rasterizer_state rs_state; 356 unsigned i; 357 358 assert(idct); 359 360 idct->quad = vl_vb_upload_quads(idct->pipe, idct->max_blocks); 361 362 if(idct->quad.buffer == NULL) 363 return false; 364 365 for (i = 0; i < 4; ++i) { 366 memset(&sampler, 0, sizeof(sampler)); 367 sampler.wrap_s = PIPE_TEX_WRAP_REPEAT; 368 sampler.wrap_t = PIPE_TEX_WRAP_REPEAT; 369 sampler.wrap_r = PIPE_TEX_WRAP_REPEAT; 370 sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; 371 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; 372 sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; 373 sampler.compare_mode = PIPE_TEX_COMPARE_NONE; 374 sampler.compare_func = PIPE_FUNC_ALWAYS; 375 sampler.normalized_coords = 1; 376 /*sampler.shadow_ambient = ; */ 377 /*sampler.lod_bias = ; */ 378 sampler.min_lod = 0; 379 /*sampler.max_lod = ; */ 380 /*sampler.border_color[0] = ; */ 381 /*sampler.max_anisotropy = ; */ 382 idct->samplers.all[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler); 383 } 384 385 memset(&rs_state, 0, sizeof(rs_state)); 386 /*rs_state.sprite_coord_enable */ 387 rs_state.sprite_coord_mode = PIPE_SPRITE_COORD_UPPER_LEFT; 388 rs_state.point_quad_rasterization = true; 389 rs_state.point_size = BLOCK_WIDTH; 390 rs_state.gl_rasterization_rules = false; 391 idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state); 392 393 vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element(); 394 395 /* Pos element */ 396 vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R32G32_FLOAT; 397 398 idct->vertex_buffer_stride = vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 1, 1); 399 idct->vertex_elems_state = idct->pipe->create_vertex_elements_state(idct->pipe, 2, vertex_elems); 400 401 return true; 402} 403 404static void 405cleanup_state(struct vl_idct *idct) 406{ 407 unsigned i; 408 409 for (i = 0; i < 4; ++i) 410 idct->pipe->delete_sampler_state(idct->pipe, idct->samplers.all[i]); 411 412 idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state); 413 idct->pipe->delete_vertex_elements_state(idct->pipe, idct->vertex_elems_state); 414} 415 416static bool 417init_textures(struct vl_idct *idct, struct vl_idct_buffer *buffer) 418{ 419 struct pipe_resource template; 420 struct pipe_sampler_view sampler_view; 421 unsigned i; 422 423 assert(idct && buffer); 424 425 /* create textures */ 426 memset(&template, 0, sizeof(struct pipe_resource)); 427 template.last_level = 0; 428 template.depth0 = 1; 429 template.bind = PIPE_BIND_SAMPLER_VIEW; 430 template.flags = 0; 431 432 template.target = PIPE_TEXTURE_2D; 433 template.format = PIPE_FORMAT_R16G16B16A16_SNORM; 434 template.width0 = idct->buffer_width / 4; 435 template.height0 = idct->buffer_height; 436 template.depth0 = 1; 437 template.usage = PIPE_USAGE_STREAM; 438 buffer->textures.individual.source = idct->pipe->screen->resource_create(idct->pipe->screen, &template); 439 440 template.target = PIPE_TEXTURE_3D; 441 template.format = PIPE_FORMAT_R16G16B16A16_SNORM; 442 template.width0 = idct->buffer_width / NR_RENDER_TARGETS; 443 template.height0 = idct->buffer_height / 4; 444 template.depth0 = NR_RENDER_TARGETS; 445 template.usage = PIPE_USAGE_STATIC; 446 buffer->textures.individual.intermediate = idct->pipe->screen->resource_create(idct->pipe->screen, &template); 447 448 for (i = 0; i < 4; ++i) { 449 if(buffer->textures.all[i] == NULL) 450 return false; /* a texture failed to allocate */ 451 452 u_sampler_view_default_template(&sampler_view, buffer->textures.all[i], buffer->textures.all[i]->format); 453 buffer->sampler_views.all[i] = idct->pipe->create_sampler_view(idct->pipe, buffer->textures.all[i], &sampler_view); 454 } 455 456 return true; 457} 458 459static void 460cleanup_textures(struct vl_idct *idct, struct vl_idct_buffer *buffer) 461{ 462 unsigned i; 463 464 assert(idct && buffer); 465 466 for (i = 0; i < 4; ++i) { 467 pipe_sampler_view_reference(&buffer->sampler_views.all[i], NULL); 468 pipe_resource_reference(&buffer->textures.all[i], NULL); 469 } 470} 471 472static bool 473init_vertex_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer) 474{ 475 assert(idct && buffer); 476 477 buffer->vertex_bufs.individual.quad.stride = idct->quad.stride; 478 buffer->vertex_bufs.individual.quad.max_index = idct->quad.max_index; 479 buffer->vertex_bufs.individual.quad.buffer_offset = idct->quad.buffer_offset; 480 pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, idct->quad.buffer); 481 482 buffer->vertex_bufs.individual.pos = vl_vb_init( 483 &buffer->blocks, idct->pipe, idct->max_blocks, 2, 484 idct->vertex_buffer_stride); 485 486 if(buffer->vertex_bufs.individual.pos.buffer == NULL) 487 return false; 488 489 return true; 490} 491 492static void 493cleanup_vertex_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer) 494{ 495 assert(idct && buffer); 496 497 pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, NULL); 498 pipe_resource_reference(&buffer->vertex_bufs.individual.pos.buffer, NULL); 499 500 vl_vb_cleanup(&buffer->blocks); 501} 502 503struct pipe_resource * 504vl_idct_upload_matrix(struct pipe_context *pipe) 505{ 506 struct pipe_resource template, *matrix; 507 struct pipe_transfer *buf_transfer; 508 unsigned i, j, pitch; 509 float *f; 510 511 struct pipe_box rect = 512 { 513 0, 0, 0, 514 BLOCK_WIDTH / 4, 515 BLOCK_HEIGHT, 516 1 517 }; 518 519 memset(&template, 0, sizeof(struct pipe_resource)); 520 template.target = PIPE_TEXTURE_2D; 521 template.format = PIPE_FORMAT_R32G32B32A32_FLOAT; 522 template.last_level = 0; 523 template.width0 = 2; 524 template.height0 = 8; 525 template.depth0 = 1; 526 template.usage = PIPE_USAGE_IMMUTABLE; 527 template.bind = PIPE_BIND_SAMPLER_VIEW; 528 template.flags = 0; 529 530 matrix = pipe->screen->resource_create(pipe->screen, &template); 531 532 /* matrix */ 533 buf_transfer = pipe->get_transfer 534 ( 535 pipe, matrix, 536 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, 537 &rect 538 ); 539 pitch = buf_transfer->stride / sizeof(float); 540 541 f = pipe->transfer_map(pipe, buf_transfer); 542 for(i = 0; i < BLOCK_HEIGHT; ++i) 543 for(j = 0; j < BLOCK_WIDTH; ++j) 544 // transpose and scale 545 f[i * pitch + j] = const_matrix[j][i] * STAGE1_SCALE; 546 547 pipe->transfer_unmap(pipe, buf_transfer); 548 pipe->transfer_destroy(pipe, buf_transfer); 549 550 return matrix; 551} 552 553bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, 554 unsigned buffer_width, unsigned buffer_height, 555 struct pipe_resource *matrix) 556{ 557 assert(idct && pipe && matrix); 558 559 idct->pipe = pipe; 560 idct->buffer_width = buffer_width; 561 idct->buffer_height = buffer_height; 562 pipe_resource_reference(&idct->matrix, matrix); 563 564 idct->max_blocks = 565 align(buffer_width, BLOCK_WIDTH) / BLOCK_WIDTH * 566 align(buffer_height, BLOCK_HEIGHT) / BLOCK_HEIGHT; 567 568 if(!init_shaders(idct)) 569 return false; 570 571 if(!init_state(idct)) { 572 cleanup_shaders(idct); 573 return false; 574 } 575 576 return true; 577} 578 579void 580vl_idct_cleanup(struct vl_idct *idct) 581{ 582 cleanup_shaders(idct); 583 cleanup_state(idct); 584 585 pipe_resource_reference(&idct->matrix, NULL); 586} 587 588bool 589vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, struct pipe_resource *dst) 590{ 591 struct pipe_surface template; 592 593 unsigned i; 594 595 assert(buffer); 596 assert(idct); 597 assert(dst); 598 599 pipe_resource_reference(&buffer->textures.individual.matrix, idct->matrix); 600 pipe_resource_reference(&buffer->textures.individual.transpose, idct->matrix); 601 pipe_resource_reference(&buffer->destination, dst); 602 603 if (!init_textures(idct, buffer)) 604 return false; 605 606 if (!init_vertex_buffers(idct, buffer)) 607 return false; 608 609 /* init state */ 610 buffer->viewport[0].scale[0] = buffer->textures.individual.intermediate->width0; 611 buffer->viewport[0].scale[1] = buffer->textures.individual.intermediate->height0; 612 613 buffer->viewport[1].scale[0] = buffer->destination->width0; 614 buffer->viewport[1].scale[1] = buffer->destination->height0; 615 616 buffer->fb_state[0].width = buffer->textures.individual.intermediate->width0; 617 buffer->fb_state[0].height = buffer->textures.individual.intermediate->height0; 618 619 buffer->fb_state[0].nr_cbufs = NR_RENDER_TARGETS; 620 for(i = 0; i < NR_RENDER_TARGETS; ++i) { 621 memset(&template, 0, sizeof(template)); 622 template.format = buffer->textures.individual.intermediate->format; 623 template.u.tex.first_layer = i; 624 template.u.tex.last_layer = i; 625 template.usage = PIPE_BIND_RENDER_TARGET; 626 buffer->fb_state[0].cbufs[i] = idct->pipe->create_surface( 627 idct->pipe, buffer->textures.individual.intermediate, 628 &template); 629 } 630 631 buffer->fb_state[1].width = buffer->destination->width0; 632 buffer->fb_state[1].height = buffer->destination->height0; 633 634 buffer->fb_state[1].nr_cbufs = 1; 635 636 memset(&template, 0, sizeof(template)); 637 template.format = buffer->destination->format; 638 template.usage = PIPE_BIND_RENDER_TARGET; 639 buffer->fb_state[1].cbufs[0] = idct->pipe->create_surface( 640 idct->pipe, buffer->destination, &template); 641 642 for(i = 0; i < 2; ++i) { 643 buffer->viewport[i].scale[2] = 1; 644 buffer->viewport[i].scale[3] = 1; 645 buffer->viewport[i].translate[0] = 0; 646 buffer->viewport[i].translate[1] = 0; 647 buffer->viewport[i].translate[2] = 0; 648 buffer->viewport[i].translate[3] = 0; 649 650 buffer->fb_state[i].zsbuf = NULL; 651 } 652 653 return true; 654} 655 656void 657vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer) 658{ 659 unsigned i; 660 661 assert(buffer); 662 663 for(i = 0; i < NR_RENDER_TARGETS; ++i) { 664 idct->pipe->surface_destroy(idct->pipe, buffer->fb_state[0].cbufs[i]); 665 } 666 667 idct->pipe->surface_destroy(idct->pipe, buffer->fb_state[1].cbufs[0]); 668 669 cleanup_textures(idct, buffer); 670 cleanup_vertex_buffers(idct, buffer); 671} 672 673void 674vl_idct_map_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer) 675{ 676 assert(idct); 677 678 struct pipe_box rect = 679 { 680 0, 0, 0, 681 buffer->textures.individual.source->width0, 682 buffer->textures.individual.source->height0, 683 1 684 }; 685 686 buffer->tex_transfer = idct->pipe->get_transfer 687 ( 688 idct->pipe, buffer->textures.individual.source, 689 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, 690 &rect 691 ); 692 693 buffer->texels = idct->pipe->transfer_map(idct->pipe, buffer->tex_transfer); 694 695 vl_vb_map(&buffer->blocks, idct->pipe); 696} 697 698void 699vl_idct_add_block(struct vl_idct_buffer *buffer, unsigned x, unsigned y, short *block) 700{ 701 struct vertex2f v; 702 unsigned tex_pitch; 703 short *texels; 704 705 unsigned i; 706 707 assert(buffer); 708 709 tex_pitch = buffer->tex_transfer->stride / sizeof(short); 710 texels = buffer->texels + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH; 711 712 for (i = 0; i < BLOCK_HEIGHT; ++i) 713 memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * sizeof(short)); 714 715 v.x = x; 716 v.y = y; 717 vl_vb_add_block(&buffer->blocks, (float*)&v); 718} 719 720void 721vl_idct_unmap_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer) 722{ 723 assert(idct && buffer); 724 725 idct->pipe->transfer_unmap(idct->pipe, buffer->tex_transfer); 726 idct->pipe->transfer_destroy(idct->pipe, buffer->tex_transfer); 727 vl_vb_unmap(&buffer->blocks, idct->pipe); 728} 729 730void 731vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer) 732{ 733 unsigned num_verts; 734 735 assert(idct); 736 737 num_verts = vl_vb_restart(&buffer->blocks); 738 739 if(num_verts > 0) { 740 741 idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state); 742 idct->pipe->set_vertex_buffers(idct->pipe, 2, buffer->vertex_bufs.all); 743 idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state); 744 745 /* first stage */ 746 idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[0]); 747 idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport[0]); 748 idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]); 749 idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[0]); 750 idct->pipe->bind_vs_state(idct->pipe, idct->matrix_vs); 751 idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs); 752 util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts); 753 754 /* second stage */ 755 idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[1]); 756 idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport[1]); 757 idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[1]); 758 idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[1]); 759 idct->pipe->bind_vs_state(idct->pipe, idct->transpose_vs); 760 idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs); 761 util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts); 762 } 763} 764