vl_idct.c revision 7c48575402e8c384db2fab24e4dd0fc72bef0451
1/************************************************************************** 2 * 3 * Copyright 2010 Christian König 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include <assert.h> 29 30#include <pipe/p_context.h> 31#include <pipe/p_screen.h> 32 33#include <util/u_draw.h> 34#include <util/u_sampler.h> 35 36#include <tgsi/tgsi_ureg.h> 37 38#include "vl_defines.h" 39#include "vl_types.h" 40#include "vl_vertex_buffers.h" 41#include "vl_idct.h" 42 43enum VS_OUTPUT 44{ 45 VS_O_VPOS, 46 VS_O_L_ADDR0, 47 VS_O_L_ADDR1, 48 VS_O_R_ADDR0, 49 VS_O_R_ADDR1 50}; 51 52/** 53 * The DCT matrix stored as hex representation of floats. Equal to the following equation: 54 * for (i = 0; i < 8; ++i) 55 * for (j = 0; j < 8; ++j) 56 * if (i == 0) const_matrix[i][j] = 1.0f / sqrtf(8.0f); 57 * else const_matrix[i][j] = sqrtf(2.0f / 8.0f) * cosf((2 * j + 1) * i * M_PI / (2.0f * 8.0f)); 58 */ 59static const uint32_t const_matrix[8][8] = { 60 { 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3 }, 61 { 0x3efb14be, 0x3ed4db31, 0x3e8e39da, 0x3dc7c5c4, 0xbdc7c5c2, 0xbe8e39d9, 0xbed4db32, 0xbefb14bf }, 62 { 0x3eec835f, 0x3e43ef15, 0xbe43ef14, 0xbeec835e, 0xbeec835f, 0xbe43ef1a, 0x3e43ef1b, 0x3eec835f }, 63 { 0x3ed4db31, 0xbdc7c5c2, 0xbefb14bf, 0xbe8e39dd, 0x3e8e39d7, 0x3efb14bf, 0x3dc7c5d0, 0xbed4db34 }, 64 { 0x3eb504f3, 0xbeb504f3, 0xbeb504f4, 0x3eb504f1, 0x3eb504f3, 0xbeb504f0, 0xbeb504ef, 0x3eb504f4 }, 65 { 0x3e8e39da, 0xbefb14bf, 0x3dc7c5c8, 0x3ed4db32, 0xbed4db34, 0xbdc7c5bb, 0x3efb14bf, 0xbe8e39d7 }, 66 { 0x3e43ef15, 0xbeec835f, 0x3eec835f, 0xbe43ef07, 0xbe43ef23, 0x3eec8361, 0xbeec835c, 0x3e43ef25 }, 67 { 0x3dc7c5c4, 0xbe8e39dd, 0x3ed4db32, 0xbefb14c0, 0x3efb14be, 0xbed4db31, 0x3e8e39ce, 0xbdc7c596 }, 68}; 69 70static void 71calc_addr(struct ureg_program *shader, struct ureg_dst addr[2], 72 struct ureg_src tc, struct ureg_src start, bool right_side, 73 bool transposed, float size) 74{ 75 unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y; 76 unsigned sw_start = right_side ? TGSI_SWIZZLE_Y : TGSI_SWIZZLE_X; 77 78 unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X; 79 unsigned sw_tc = right_side ? TGSI_SWIZZLE_X : TGSI_SWIZZLE_Y; 80 81 /* 82 * addr[0..1].(start) = right_side ? start.x : tc.x 83 * addr[0..1].(tc) = right_side ? tc.y : start.y 84 * addr[0..1].z = tc.z 85 * addr[1].(start) += 1.0f / scale 86 */ 87 ureg_MOV(shader, ureg_writemask(addr[0], wm_start), ureg_scalar(start, sw_start)); 88 ureg_MOV(shader, ureg_writemask(addr[0], wm_tc), ureg_scalar(tc, sw_tc)); 89 90 ureg_ADD(shader, ureg_writemask(addr[1], wm_start), ureg_scalar(start, sw_start), ureg_imm1f(shader, 1.0f / size)); 91 ureg_MOV(shader, ureg_writemask(addr[1], wm_tc), ureg_scalar(tc, sw_tc)); 92} 93 94static void 95increment_addr(struct ureg_program *shader, struct ureg_dst daddr[2], 96 struct ureg_src saddr[2], bool right_side, bool transposed, 97 int pos, float size) 98{ 99 unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y; 100 unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X; 101 102 /* 103 * daddr[0..1].(start) = saddr[0..1].(start) 104 * daddr[0..1].(tc) = saddr[0..1].(tc) 105 */ 106 107 ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]); 108 ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size)); 109 ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]); 110 ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size)); 111} 112 113static void 114fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2], 115 struct ureg_src sampler, bool resource3d) 116{ 117 ureg_TEX(shader, m[0], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[0], sampler); 118 ureg_TEX(shader, m[1], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[1], sampler); 119} 120 121static void 122matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2]) 123{ 124 struct ureg_dst tmp; 125 126 tmp = ureg_DECL_temporary(shader); 127 128 /* 129 * tmp.xy = dot4(m[0][0..1], m[1][0..1]) 130 * dst = tmp.x + tmp.y 131 */ 132 ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0])); 133 ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(l[1]), ureg_src(r[1])); 134 ureg_ADD(shader, dst, 135 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), 136 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); 137 138 ureg_release_temporary(shader, tmp); 139} 140 141static void * 142create_mismatch_vert_shader(struct vl_idct *idct) 143{ 144 struct ureg_program *shader; 145 struct ureg_src vrect, vpos; 146 struct ureg_src scale; 147 struct ureg_dst t_tex; 148 struct ureg_dst o_vpos, o_addr[2]; 149 150 shader = ureg_create(TGSI_PROCESSOR_VERTEX); 151 if (!shader) 152 return NULL; 153 154 vrect = ureg_DECL_vs_input(shader, VS_I_RECT); 155 vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); 156 157 t_tex = ureg_DECL_temporary(shader); 158 159 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); 160 161 o_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0); 162 o_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1); 163 164 /* 165 * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height) 166 * 167 * t_vpos = vpos + 7 / BLOCK_WIDTH 168 * o_vpos.xy = t_vpos * scale 169 * 170 * o_addr = calc_addr(...) 171 * 172 */ 173 174 scale = ureg_imm2f(shader, 175 (float)BLOCK_WIDTH / idct->buffer_width, 176 (float)BLOCK_HEIGHT / idct->buffer_height); 177 178 ureg_MAD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), vpos, scale, scale); 179 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); 180 181 ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, scale); 182 calc_addr(shader, o_addr, ureg_src(t_tex), ureg_src(t_tex), false, false, idct->buffer_width / 4); 183 184 ureg_release_temporary(shader, t_tex); 185 186 ureg_END(shader); 187 188 return ureg_create_shader_and_destroy(shader, idct->pipe); 189} 190 191static void * 192create_mismatch_frag_shader(struct vl_idct *idct) 193{ 194 struct ureg_program *shader; 195 196 struct ureg_src addr[2]; 197 198 struct ureg_dst m[8][2]; 199 struct ureg_dst fragment; 200 201 unsigned i; 202 203 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); 204 if (!shader) 205 return NULL; 206 207 addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); 208 addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); 209 210 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); 211 212 for (i = 0; i < 8; ++i) { 213 m[i][0] = ureg_DECL_temporary(shader); 214 m[i][1] = ureg_DECL_temporary(shader); 215 } 216 217 for (i = 0; i < 8; ++i) { 218 increment_addr(shader, m[i], addr, false, false, i, idct->buffer_height); 219 } 220 221 for (i = 0; i < 8; ++i) { 222 struct ureg_src s_addr[2] = { ureg_src(m[i][0]), ureg_src(m[i][1]) }; 223 fetch_four(shader, m[i], s_addr, ureg_DECL_sampler(shader, 0), false); 224 } 225 226 for (i = 1; i < 8; ++i) { 227 ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[i][0])); 228 ureg_ADD(shader, m[0][1], ureg_src(m[0][1]), ureg_src(m[i][1])); 229 } 230 231 ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[0][1])); 232 ureg_DP4(shader, m[0][0], ureg_abs(ureg_src(m[0][0])), ureg_imm1f(shader, 1 << 14)); 233 234 ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_abs(ureg_src(m[7][1])), ureg_imm1f(shader, 1 << 14)); 235 ureg_FRC(shader, m[0][0], ureg_src(m[0][0])); 236 ureg_SGT(shader, m[0][0], ureg_imm1f(shader, 0.5f), ureg_abs(ureg_src(m[0][0]))); 237 238 ureg_CMP(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_negate(ureg_src(m[0][0])), 239 ureg_imm1f(shader, 1.0f / (1 << 15)), ureg_imm1f(shader, -1.0f / (1 << 15))); 240 ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_src(m[0][0]), 241 ureg_scalar(ureg_src(m[0][0]), TGSI_SWIZZLE_X)); 242 243 ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(m[7][1])); 244 ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(m[0][0]), ureg_src(m[7][1])); 245 246 for (i = 0; i < 8; ++i) { 247 ureg_release_temporary(shader, m[i][0]); 248 ureg_release_temporary(shader, m[i][1]); 249 } 250 251 ureg_END(shader); 252 253 return ureg_create_shader_and_destroy(shader, idct->pipe); 254} 255 256static void * 257create_stage1_vert_shader(struct vl_idct *idct) 258{ 259 struct ureg_program *shader; 260 struct ureg_src vrect, vpos; 261 struct ureg_src scale; 262 struct ureg_dst t_tex, t_start; 263 struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2]; 264 265 shader = ureg_create(TGSI_PROCESSOR_VERTEX); 266 if (!shader) 267 return NULL; 268 269 vrect = ureg_DECL_vs_input(shader, VS_I_RECT); 270 vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); 271 272 t_tex = ureg_DECL_temporary(shader); 273 t_start = ureg_DECL_temporary(shader); 274 275 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); 276 277 o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0); 278 o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1); 279 280 o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0); 281 o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1); 282 283 /* 284 * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height) 285 * 286 * t_vpos = vpos + vrect 287 * o_vpos.xy = t_vpos * scale 288 * o_vpos.zw = vpos 289 * 290 * o_l_addr = calc_addr(...) 291 * o_r_addr = calc_addr(...) 292 * 293 */ 294 295 scale = ureg_imm2f(shader, 296 (float)BLOCK_WIDTH / idct->buffer_width, 297 (float)BLOCK_HEIGHT / idct->buffer_height); 298 299 ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, vrect); 300 ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale); 301 302 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex)); 303 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); 304 305 ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale); 306 307 calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4); 308 calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, BLOCK_WIDTH / 4); 309 310 ureg_release_temporary(shader, t_tex); 311 ureg_release_temporary(shader, t_start); 312 313 ureg_END(shader); 314 315 return ureg_create_shader_and_destroy(shader, idct->pipe); 316} 317 318static void * 319create_stage1_frag_shader(struct vl_idct *idct) 320{ 321 struct ureg_program *shader; 322 323 struct ureg_src l_addr[2], r_addr[2]; 324 325 struct ureg_dst l[4][2], r[2]; 326 struct ureg_dst fragment[idct->nr_of_render_targets]; 327 328 int i, j; 329 330 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); 331 if (!shader) 332 return NULL; 333 334 l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); 335 l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); 336 337 r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR); 338 r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR); 339 340 for (i = 0; i < idct->nr_of_render_targets; ++i) 341 fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i); 342 343 for (i = 0; i < 4; ++i) { 344 l[i][0] = ureg_DECL_temporary(shader); 345 l[i][1] = ureg_DECL_temporary(shader); 346 } 347 348 r[0] = ureg_DECL_temporary(shader); 349 r[1] = ureg_DECL_temporary(shader); 350 351 for (i = 0; i < 4; ++i) { 352 increment_addr(shader, l[i], l_addr, false, false, i - 2, idct->buffer_height); 353 } 354 355 for (i = 0; i < 4; ++i) { 356 struct ureg_src s_addr[2] = { ureg_src(l[i][0]), ureg_src(l[i][1]) }; 357 fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 0), false); 358 } 359 360 for (i = 0; i < idct->nr_of_render_targets; ++i) { 361 increment_addr(shader, r, r_addr, true, true, i - (signed)idct->nr_of_render_targets / 2, BLOCK_HEIGHT); 362 363 struct ureg_src s_addr[2] = { ureg_src(r[0]), ureg_src(r[1]) }; 364 fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 1), false); 365 366 for (j = 0; j < 4; ++j) { 367 matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r); 368 } 369 } 370 371 for (i = 0; i < 4; ++i) { 372 ureg_release_temporary(shader, l[i][0]); 373 ureg_release_temporary(shader, l[i][1]); 374 } 375 ureg_release_temporary(shader, r[0]); 376 ureg_release_temporary(shader, r[1]); 377 378 ureg_END(shader); 379 380 return ureg_create_shader_and_destroy(shader, idct->pipe); 381} 382 383void 384vl_idct_stage2_vert_shader(struct vl_idct *idct, struct ureg_program *shader, 385 unsigned first_output, struct ureg_dst tex) 386{ 387 struct ureg_src vrect, vpos; 388 struct ureg_src scale; 389 struct ureg_dst t_start; 390 struct ureg_dst o_l_addr[2], o_r_addr[2]; 391 392 vrect = ureg_DECL_vs_input(shader, VS_I_RECT); 393 vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); 394 395 t_start = ureg_DECL_temporary(shader); 396 397 --first_output; 398 399 o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR0); 400 o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR1); 401 402 o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR0); 403 o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR1); 404 405 scale = ureg_imm2f(shader, 406 (float)BLOCK_WIDTH / idct->buffer_width, 407 (float)BLOCK_HEIGHT / idct->buffer_height); 408 409 ureg_MUL(shader, ureg_writemask(tex, TGSI_WRITEMASK_Z), 410 ureg_scalar(vrect, TGSI_SWIZZLE_X), 411 ureg_imm1f(shader, BLOCK_WIDTH / idct->nr_of_render_targets)); 412 ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale); 413 414 calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, BLOCK_WIDTH / 4); 415 calc_addr(shader, o_r_addr, ureg_src(tex), ureg_src(t_start), true, false, idct->buffer_height / 4); 416 417 ureg_MOV(shader, ureg_writemask(o_r_addr[0], TGSI_WRITEMASK_Z), ureg_src(tex)); 418 ureg_MOV(shader, ureg_writemask(o_r_addr[1], TGSI_WRITEMASK_Z), ureg_src(tex)); 419} 420 421void 422vl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader, 423 unsigned first_input, struct ureg_dst fragment) 424{ 425 struct ureg_src l_addr[2], r_addr[2]; 426 427 struct ureg_dst l[2], r[2]; 428 429 --first_input; 430 431 l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); 432 l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); 433 434 r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR); 435 r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR); 436 437 l[0] = ureg_DECL_temporary(shader); 438 l[1] = ureg_DECL_temporary(shader); 439 r[0] = ureg_DECL_temporary(shader); 440 r[1] = ureg_DECL_temporary(shader); 441 442 fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 1), false); 443 fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 0), true); 444 445 matrix_mul(shader, fragment, l, r); 446 447 ureg_release_temporary(shader, l[0]); 448 ureg_release_temporary(shader, l[1]); 449 ureg_release_temporary(shader, r[0]); 450 ureg_release_temporary(shader, r[1]); 451} 452 453static bool 454init_shaders(struct vl_idct *idct) 455{ 456 idct->vs_mismatch = create_mismatch_vert_shader(idct); 457 if (!idct->vs_mismatch) 458 goto error_vs_mismatch; 459 460 idct->fs_mismatch = create_mismatch_frag_shader(idct); 461 if (!idct->fs_mismatch) 462 goto error_fs_mismatch; 463 464 idct->vs = create_stage1_vert_shader(idct); 465 if (!idct->vs) 466 goto error_vs; 467 468 idct->fs = create_stage1_frag_shader(idct); 469 if (!idct->fs) 470 goto error_fs; 471 472 return true; 473 474error_fs: 475 idct->pipe->delete_vs_state(idct->pipe, idct->vs); 476 477error_vs: 478 idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch); 479 480error_fs_mismatch: 481 idct->pipe->delete_vs_state(idct->pipe, idct->fs); 482 483error_vs_mismatch: 484 return false; 485} 486 487static void 488cleanup_shaders(struct vl_idct *idct) 489{ 490 idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch); 491 idct->pipe->delete_fs_state(idct->pipe, idct->fs_mismatch); 492 idct->pipe->delete_vs_state(idct->pipe, idct->vs); 493 idct->pipe->delete_fs_state(idct->pipe, idct->fs); 494} 495 496static bool 497init_state(struct vl_idct *idct) 498{ 499 struct pipe_blend_state blend; 500 struct pipe_rasterizer_state rs_state; 501 struct pipe_sampler_state sampler; 502 unsigned i; 503 504 assert(idct); 505 506 memset(&rs_state, 0, sizeof(rs_state)); 507 rs_state.point_size = 1; 508 rs_state.gl_rasterization_rules = true; 509 idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state); 510 if (!idct->rs_state) 511 goto error_rs_state; 512 513 memset(&blend, 0, sizeof blend); 514 515 blend.independent_blend_enable = 0; 516 blend.rt[0].blend_enable = 0; 517 blend.rt[0].rgb_func = PIPE_BLEND_ADD; 518 blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; 519 blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE; 520 blend.rt[0].alpha_func = PIPE_BLEND_ADD; 521 blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; 522 blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE; 523 blend.logicop_enable = 0; 524 blend.logicop_func = PIPE_LOGICOP_CLEAR; 525 /* Needed to allow color writes to FB, even if blending disabled */ 526 blend.rt[0].colormask = PIPE_MASK_RGBA; 527 blend.dither = 0; 528 idct->blend = idct->pipe->create_blend_state(idct->pipe, &blend); 529 if (!idct->blend) 530 goto error_blend; 531 532 for (i = 0; i < 2; ++i) { 533 memset(&sampler, 0, sizeof(sampler)); 534 sampler.wrap_s = PIPE_TEX_WRAP_REPEAT; 535 sampler.wrap_t = PIPE_TEX_WRAP_REPEAT; 536 sampler.wrap_r = PIPE_TEX_WRAP_REPEAT; 537 sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; 538 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; 539 sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; 540 sampler.compare_mode = PIPE_TEX_COMPARE_NONE; 541 sampler.compare_func = PIPE_FUNC_ALWAYS; 542 sampler.normalized_coords = 1; 543 idct->samplers[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler); 544 if (!idct->samplers[i]) 545 goto error_samplers; 546 } 547 548 return true; 549 550error_samplers: 551 for (i = 0; i < 2; ++i) 552 if (idct->samplers[i]) 553 idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]); 554 555 idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state); 556 557error_blend: 558 idct->pipe->delete_blend_state(idct->pipe, idct->blend); 559 560error_rs_state: 561 return false; 562} 563 564static void 565cleanup_state(struct vl_idct *idct) 566{ 567 unsigned i; 568 569 for (i = 0; i < 2; ++i) 570 idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]); 571 572 idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state); 573 idct->pipe->delete_blend_state(idct->pipe, idct->blend); 574} 575 576static bool 577init_source(struct vl_idct *idct, struct vl_idct_buffer *buffer) 578{ 579 struct pipe_resource *tex; 580 struct pipe_surface surf_templ; 581 582 assert(idct && buffer); 583 584 tex = buffer->sampler_views.individual.source->texture; 585 586 buffer->fb_state_mismatch.width = tex->width0; 587 buffer->fb_state_mismatch.height = tex->height0; 588 buffer->fb_state_mismatch.nr_cbufs = 1; 589 590 memset(&surf_templ, 0, sizeof(surf_templ)); 591 surf_templ.format = tex->format; 592 surf_templ.u.tex.first_layer = 0; 593 surf_templ.u.tex.last_layer = 0; 594 surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET; 595 buffer->fb_state_mismatch.cbufs[0] = idct->pipe->create_surface(idct->pipe, tex, &surf_templ); 596 597 buffer->viewport_mismatch.scale[0] = tex->width0; 598 buffer->viewport_mismatch.scale[1] = tex->height0; 599 buffer->viewport_mismatch.scale[2] = 1; 600 buffer->viewport_mismatch.scale[3] = 1; 601 602 return true; 603} 604 605static void 606cleanup_source(struct vl_idct *idct, struct vl_idct_buffer *buffer) 607{ 608 assert(idct && buffer); 609 610 pipe_surface_reference(&buffer->fb_state_mismatch.cbufs[0], NULL); 611 612 pipe_sampler_view_reference(&buffer->sampler_views.individual.source, NULL); 613} 614 615static bool 616init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer) 617{ 618 struct pipe_resource *tex; 619 struct pipe_surface surf_templ; 620 unsigned i; 621 622 assert(idct && buffer); 623 624 tex = buffer->sampler_views.individual.intermediate->texture; 625 626 buffer->fb_state.width = tex->width0; 627 buffer->fb_state.height = tex->height0; 628 buffer->fb_state.nr_cbufs = idct->nr_of_render_targets; 629 for(i = 0; i < idct->nr_of_render_targets; ++i) { 630 memset(&surf_templ, 0, sizeof(surf_templ)); 631 surf_templ.format = tex->format; 632 surf_templ.u.tex.first_layer = i; 633 surf_templ.u.tex.last_layer = i; 634 surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET; 635 buffer->fb_state.cbufs[i] = idct->pipe->create_surface( 636 idct->pipe, tex, &surf_templ); 637 638 if (!buffer->fb_state.cbufs[i]) 639 goto error_surfaces; 640 } 641 642 buffer->viewport.scale[0] = tex->width0; 643 buffer->viewport.scale[1] = tex->height0; 644 buffer->viewport.scale[2] = 1; 645 buffer->viewport.scale[3] = 1; 646 647 return true; 648 649error_surfaces: 650 for(i = 0; i < idct->nr_of_render_targets; ++i) 651 pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL); 652 653 return false; 654} 655 656static void 657cleanup_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer) 658{ 659 unsigned i; 660 661 assert(idct && buffer); 662 663 for(i = 0; i < idct->nr_of_render_targets; ++i) 664 pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL); 665 666 pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, NULL); 667} 668 669struct pipe_sampler_view * 670vl_idct_upload_matrix(struct pipe_context *pipe, float scale) 671{ 672 struct pipe_resource tex_templ, *matrix; 673 struct pipe_sampler_view sv_templ, *sv; 674 struct pipe_transfer *buf_transfer; 675 unsigned i, j, pitch; 676 float *f; 677 678 struct pipe_box rect = 679 { 680 0, 0, 0, 681 BLOCK_WIDTH / 4, 682 BLOCK_HEIGHT, 683 1 684 }; 685 686 assert(pipe); 687 688 memset(&tex_templ, 0, sizeof(tex_templ)); 689 tex_templ.target = PIPE_TEXTURE_2D; 690 tex_templ.format = PIPE_FORMAT_R32G32B32A32_FLOAT; 691 tex_templ.last_level = 0; 692 tex_templ.width0 = 2; 693 tex_templ.height0 = 8; 694 tex_templ.depth0 = 1; 695 tex_templ.array_size = 1; 696 tex_templ.usage = PIPE_USAGE_IMMUTABLE; 697 tex_templ.bind = PIPE_BIND_SAMPLER_VIEW; 698 tex_templ.flags = 0; 699 700 matrix = pipe->screen->resource_create(pipe->screen, &tex_templ); 701 if (!matrix) 702 goto error_matrix; 703 704 buf_transfer = pipe->get_transfer 705 ( 706 pipe, matrix, 707 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, 708 &rect 709 ); 710 if (!buf_transfer) 711 goto error_transfer; 712 713 pitch = buf_transfer->stride / sizeof(float); 714 715 f = pipe->transfer_map(pipe, buf_transfer); 716 if (!f) 717 goto error_map; 718 719 for(i = 0; i < BLOCK_HEIGHT; ++i) 720 for(j = 0; j < BLOCK_WIDTH; ++j) 721 // transpose and scale 722 f[i * pitch + j] = ((const float (*)[8])const_matrix)[j][i] * scale; 723 724 pipe->transfer_unmap(pipe, buf_transfer); 725 pipe->transfer_destroy(pipe, buf_transfer); 726 727 memset(&sv_templ, 0, sizeof(sv_templ)); 728 u_sampler_view_default_template(&sv_templ, matrix, matrix->format); 729 sv = pipe->create_sampler_view(pipe, matrix, &sv_templ); 730 pipe_resource_reference(&matrix, NULL); 731 if (!sv) 732 goto error_map; 733 734 return sv; 735 736error_map: 737 pipe->transfer_destroy(pipe, buf_transfer); 738 739error_transfer: 740 pipe_resource_reference(&matrix, NULL); 741 742error_matrix: 743 return NULL; 744} 745 746bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, 747 unsigned buffer_width, unsigned buffer_height, 748 unsigned nr_of_render_targets, 749 struct pipe_sampler_view *matrix, 750 struct pipe_sampler_view *transpose) 751{ 752 assert(idct && pipe); 753 assert(matrix && transpose); 754 755 idct->pipe = pipe; 756 idct->buffer_width = buffer_width; 757 idct->buffer_height = buffer_height; 758 idct->nr_of_render_targets = nr_of_render_targets; 759 760 pipe_sampler_view_reference(&idct->matrix, matrix); 761 pipe_sampler_view_reference(&idct->transpose, transpose); 762 763 if(!init_shaders(idct)) 764 return false; 765 766 if(!init_state(idct)) { 767 cleanup_shaders(idct); 768 return false; 769 } 770 771 return true; 772} 773 774void 775vl_idct_cleanup(struct vl_idct *idct) 776{ 777 cleanup_shaders(idct); 778 cleanup_state(idct); 779 780 pipe_sampler_view_reference(&idct->matrix, NULL); 781 pipe_sampler_view_reference(&idct->transpose, NULL); 782} 783 784bool 785vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, 786 struct pipe_sampler_view *source, 787 struct pipe_sampler_view *intermediate) 788{ 789 assert(buffer && idct); 790 assert(source && intermediate); 791 792 memset(buffer, 0, sizeof(struct vl_idct_buffer)); 793 794 buffer->idct = idct; 795 796 pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, idct->matrix); 797 pipe_sampler_view_reference(&buffer->sampler_views.individual.source, source); 798 pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, idct->transpose); 799 pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, intermediate); 800 801 if (!init_source(idct, buffer)) 802 return false; 803 804 if (!init_intermediate(idct, buffer)) 805 return false; 806 807 return true; 808} 809 810void 811vl_idct_cleanup_buffer(struct vl_idct_buffer *buffer) 812{ 813 assert(buffer); 814 815 cleanup_source(buffer->idct, buffer); 816 cleanup_intermediate(buffer->idct, buffer); 817 818 pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, NULL); 819 pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, NULL); 820} 821 822void 823vl_idct_flush(struct vl_idct_buffer *buffer, unsigned num_instances) 824{ 825 struct vl_idct *idct; 826 assert(buffer); 827 828 idct = buffer->idct; 829 830 idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state); 831 idct->pipe->bind_blend_state(idct->pipe, idct->blend); 832 idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers); 833 idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]); 834 835 /* mismatch control */ 836 idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state_mismatch); 837 idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport_mismatch); 838 idct->pipe->bind_vs_state(idct->pipe, idct->vs_mismatch); 839 idct->pipe->bind_fs_state(idct->pipe, idct->fs_mismatch); 840 util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_POINTS, 0, 1, 0, num_instances); 841 842 /* first stage */ 843 idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state); 844 idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport); 845 idct->pipe->bind_vs_state(idct->pipe, idct->vs); 846 idct->pipe->bind_fs_state(idct->pipe, idct->fs); 847 util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances); 848} 849 850void 851vl_idct_prepare_stage2(struct vl_idct_buffer *buffer) 852{ 853 assert(buffer); 854 855 /* second stage */ 856 buffer->idct->pipe->bind_rasterizer_state(buffer->idct->pipe, buffer->idct->rs_state); 857 buffer->idct->pipe->bind_fragment_sampler_states(buffer->idct->pipe, 2, buffer->idct->samplers); 858 buffer->idct->pipe->set_fragment_sampler_views(buffer->idct->pipe, 2, buffer->sampler_views.stage[1]); 859} 860 861