vl_zscan.c revision 6ad846ee78d9d8ba93dcecdefbf89f2b981333ef
1/************************************************************************** 2 * 3 * Copyright 2011 Christian König 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include <assert.h> 29 30#include <pipe/p_screen.h> 31#include <pipe/p_context.h> 32 33#include <util/u_draw.h> 34#include <util/u_sampler.h> 35#include <util/u_inlines.h> 36 37#include <tgsi/tgsi_ureg.h> 38 39#include <vl/vl_defines.h> 40#include <vl/vl_types.h> 41 42#include "vl_zscan.h" 43#include "vl_vertex_buffers.h" 44 45enum VS_OUTPUT 46{ 47 VS_O_VPOS, 48 VS_O_VTEX 49}; 50 51const int vl_zscan_linear[] = 52{ 53 /* Linear scan pattern */ 54 0, 1, 2, 3, 4, 5, 6, 7, 55 8, 9,10,11,12,13,14,15, 56 16,17,18,19,20,21,22,23, 57 24,25,26,27,28,29,30,31, 58 32,33,34,35,36,37,38,39, 59 40,41,42,43,44,45,46,47, 60 48,49,50,51,52,53,54,55, 61 56,57,58,59,60,61,62,63 62}; 63 64const int vl_zscan_normal[] = 65{ 66 /* Zig-Zag scan pattern */ 67 0, 1, 8,16, 9, 2, 3,10, 68 17,24,32,25,18,11, 4, 5, 69 12,19,26,33,40,48,41,34, 70 27,20,13, 6, 7,14,21,28, 71 35,42,49,56,57,50,43,36, 72 29,22,15,23,30,37,44,51, 73 58,59,52,45,38,31,39,46, 74 53,60,61,54,47,55,62,63 75}; 76 77const int vl_zscan_alternate[] = 78{ 79 /* Alternate scan pattern */ 80 0, 8,16,24, 1, 9, 2,10, 81 17,25,32,40,48,56,57,49, 82 41,33,26,18, 3,11, 4,12, 83 19,27,34,42,50,58,35,43, 84 51,59,20,28, 5,13, 6,14, 85 21,29,36,44,52,60,37,45, 86 53,61,22,30, 7,15,23,31, 87 38,46,54,62,39,47,55,63 88}; 89 90static void * 91create_vert_shader(struct vl_zscan *zscan) 92{ 93 struct ureg_program *shader; 94 95 struct ureg_src scale, instance; 96 struct ureg_src vrect, vpos; 97 98 struct ureg_dst tmp; 99 struct ureg_dst o_vpos, o_vtex[zscan->num_channels]; 100 101 unsigned i; 102 103 shader = ureg_create(TGSI_PROCESSOR_VERTEX); 104 if (!shader) 105 return NULL; 106 107 scale = ureg_imm2f(shader, 108 (float)BLOCK_WIDTH / zscan->buffer_width, 109 (float)BLOCK_HEIGHT / zscan->buffer_height); 110 111 instance = ureg_DECL_system_value(shader, 0, TGSI_SEMANTIC_INSTANCEID, 0); 112 113 vrect = ureg_DECL_vs_input(shader, VS_I_RECT); 114 vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); 115 116 tmp = ureg_DECL_temporary(shader); 117 118 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); 119 120 for (i = 0; i < zscan->num_channels; ++i) 121 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i); 122 123 /* 124 * o_vpos.xy = (vpos + vrect) * scale 125 * o_vpos.zw = 1.0f 126 * 127 * tmp.xy = InstanceID / blocks_per_line 128 * tmp.x = frac(tmp.x) 129 * tmp.y = floor(tmp.y) 130 * 131 * o_vtex.x = vrect.x / blocks_per_line + tmp.x 132 * o_vtex.y = vrect.y 133 * o_vtex.z = tmp.z * blocks_per_line / blocks_total 134 */ 135 ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), vpos, vrect); 136 ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(tmp), scale); 137 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); 138 139 ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XZ), instance, 140 ureg_imm1f(shader, 1.0f / zscan->blocks_per_line)); 141 142 ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(tmp)); 143 ureg_FLR(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_src(tmp)); 144 145 for (i = 0; i < zscan->num_channels; ++i) { 146 if (i > 0) 147 ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(tmp), 148 ureg_imm1f(shader, 1.0f / (zscan->blocks_per_line * BLOCK_WIDTH))); 149 150 ureg_MAD(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_X), vrect, 151 ureg_imm1f(shader, 1.0f / zscan->blocks_per_line), ureg_src(tmp)); 152 ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Y), vrect); 153 ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), ureg_src(tmp), 154 ureg_imm1f(shader, (float)zscan->blocks_per_line / zscan->blocks_total)); 155 } 156 157 ureg_release_temporary(shader, tmp); 158 ureg_END(shader); 159 160 return ureg_create_shader_and_destroy(shader, zscan->pipe); 161} 162 163static void * 164create_frag_shader(struct vl_zscan *zscan) 165{ 166 struct ureg_program *shader; 167 struct ureg_src vtex[zscan->num_channels]; 168 169 struct ureg_src src, scan, quant; 170 171 struct ureg_dst tmp[zscan->num_channels]; 172 struct ureg_dst fragment; 173 174 unsigned i; 175 176 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); 177 if (!shader) 178 return NULL; 179 180 for (i = 0; i < zscan->num_channels; ++i) 181 vtex[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i, TGSI_INTERPOLATE_LINEAR); 182 183 src = ureg_DECL_sampler(shader, 0); 184 scan = ureg_DECL_sampler(shader, 1); 185 quant = ureg_DECL_sampler(shader, 2); 186 187 for (i = 0; i < zscan->num_channels; ++i) 188 tmp[i] = ureg_DECL_temporary(shader); 189 190 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); 191 192 /* 193 * tmp.x = tex(vtex, 1) 194 * tmp.y = vtex.z 195 * fragment = tex(tmp, 0) * quant 196 */ 197 for (i = 0; i < zscan->num_channels; ++i) 198 ureg_TEX(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, vtex[i], scan); 199 200 for (i = 0; i < zscan->num_channels; ++i) 201 ureg_MOV(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_Y), ureg_scalar(vtex[i], TGSI_SWIZZLE_Z)); 202 203 for (i = 0; i < zscan->num_channels; ++i) 204 ureg_TEX(shader, tmp[i], TGSI_TEXTURE_2D, ureg_src(tmp[i]), src); 205 206 // TODO: Fetch quant and use it 207 for (i = 0; i < zscan->num_channels; ++i) 208 ureg_MUL(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), ureg_src(tmp[i]), ureg_imm1f(shader, 1.0f)); 209 210 for (i = 0; i < zscan->num_channels; ++i) 211 ureg_release_temporary(shader, tmp[i]); 212 ureg_END(shader); 213 214 return ureg_create_shader_and_destroy(shader, zscan->pipe); 215} 216 217static bool 218init_shaders(struct vl_zscan *zscan) 219{ 220 assert(zscan); 221 222 zscan->vs = create_vert_shader(zscan); 223 if (!zscan->vs) 224 goto error_vs; 225 226 zscan->fs = create_frag_shader(zscan); 227 if (!zscan->fs) 228 goto error_fs; 229 230 return true; 231 232error_fs: 233 zscan->pipe->delete_vs_state(zscan->pipe, zscan->vs); 234 235error_vs: 236 return false; 237} 238 239static void 240cleanup_shaders(struct vl_zscan *zscan) 241{ 242 assert(zscan); 243 244 zscan->pipe->delete_vs_state(zscan->pipe, zscan->vs); 245 zscan->pipe->delete_fs_state(zscan->pipe, zscan->fs); 246} 247 248static bool 249init_state(struct vl_zscan *zscan) 250{ 251 struct pipe_blend_state blend; 252 struct pipe_rasterizer_state rs_state; 253 struct pipe_sampler_state sampler; 254 unsigned i; 255 256 assert(zscan); 257 258 memset(&rs_state, 0, sizeof(rs_state)); 259 rs_state.gl_rasterization_rules = false; 260 zscan->rs_state = zscan->pipe->create_rasterizer_state(zscan->pipe, &rs_state); 261 if (!zscan->rs_state) 262 goto error_rs_state; 263 264 memset(&blend, 0, sizeof blend); 265 266 blend.independent_blend_enable = 0; 267 blend.rt[0].blend_enable = 0; 268 blend.rt[0].rgb_func = PIPE_BLEND_ADD; 269 blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; 270 blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE; 271 blend.rt[0].alpha_func = PIPE_BLEND_ADD; 272 blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; 273 blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE; 274 blend.logicop_enable = 0; 275 blend.logicop_func = PIPE_LOGICOP_CLEAR; 276 /* Needed to allow color writes to FB, even if blending disabled */ 277 blend.rt[0].colormask = PIPE_MASK_RGBA; 278 blend.dither = 0; 279 zscan->blend = zscan->pipe->create_blend_state(zscan->pipe, &blend); 280 if (!zscan->blend) 281 goto error_blend; 282 283 for (i = 0; i < 3; ++i) { 284 memset(&sampler, 0, sizeof(sampler)); 285 sampler.wrap_s = PIPE_TEX_WRAP_REPEAT; 286 sampler.wrap_t = PIPE_TEX_WRAP_REPEAT; 287 sampler.wrap_r = PIPE_TEX_WRAP_REPEAT; 288 sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; 289 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; 290 sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; 291 sampler.compare_mode = PIPE_TEX_COMPARE_NONE; 292 sampler.compare_func = PIPE_FUNC_ALWAYS; 293 sampler.normalized_coords = 1; 294 zscan->samplers[i] = zscan->pipe->create_sampler_state(zscan->pipe, &sampler); 295 if (!zscan->samplers[i]) 296 goto error_samplers; 297 } 298 299 return true; 300 301error_samplers: 302 for (i = 0; i < 2; ++i) 303 if (zscan->samplers[i]) 304 zscan->pipe->delete_sampler_state(zscan->pipe, zscan->samplers[i]); 305 306 zscan->pipe->delete_rasterizer_state(zscan->pipe, zscan->rs_state); 307 308error_blend: 309 zscan->pipe->delete_blend_state(zscan->pipe, zscan->blend); 310 311error_rs_state: 312 return false; 313} 314 315static void 316cleanup_state(struct vl_zscan *zscan) 317{ 318 unsigned i; 319 320 assert(zscan); 321 322 for (i = 0; i < 3; ++i) 323 zscan->pipe->delete_sampler_state(zscan->pipe, zscan->samplers[i]); 324 325 zscan->pipe->delete_rasterizer_state(zscan->pipe, zscan->rs_state); 326 zscan->pipe->delete_blend_state(zscan->pipe, zscan->blend); 327} 328 329struct pipe_sampler_view * 330vl_zscan_layout(struct pipe_context *pipe, const int layout[64], unsigned blocks_per_line) 331{ 332 const unsigned total_size = blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT; 333 334 int patched_layout[64]; 335 336 struct pipe_resource res_tmpl, *res; 337 struct pipe_sampler_view sv_tmpl, *sv; 338 struct pipe_transfer *buf_transfer; 339 unsigned x, y, i, pitch; 340 float *f; 341 342 struct pipe_box rect = 343 { 344 0, 0, 0, 345 BLOCK_WIDTH * blocks_per_line, 346 BLOCK_HEIGHT, 347 1 348 }; 349 350 assert(pipe && layout && blocks_per_line); 351 352 for (i = 0; i < 64; ++i) 353 patched_layout[layout[i]] = i; 354 355 memset(&res_tmpl, 0, sizeof(res_tmpl)); 356 res_tmpl.target = PIPE_TEXTURE_2D; 357 res_tmpl.format = PIPE_FORMAT_R32_FLOAT; 358 res_tmpl.width0 = BLOCK_WIDTH * blocks_per_line; 359 res_tmpl.height0 = BLOCK_HEIGHT; 360 res_tmpl.depth0 = 1; 361 res_tmpl.array_size = 1; 362 res_tmpl.usage = PIPE_USAGE_IMMUTABLE; 363 res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW; 364 365 res = pipe->screen->resource_create(pipe->screen, &res_tmpl); 366 if (!res) 367 goto error_resource; 368 369 buf_transfer = pipe->get_transfer 370 ( 371 pipe, res, 372 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, 373 &rect 374 ); 375 if (!buf_transfer) 376 goto error_transfer; 377 378 pitch = buf_transfer->stride / sizeof(float); 379 380 f = pipe->transfer_map(pipe, buf_transfer); 381 if (!f) 382 goto error_map; 383 384 for (i = 0; i < blocks_per_line; ++i) 385 for (y = 0; y < BLOCK_HEIGHT; ++y) 386 for (x = 0; x < BLOCK_WIDTH; ++x) { 387 float addr = patched_layout[x + y * BLOCK_WIDTH] + 388 i * BLOCK_WIDTH * BLOCK_HEIGHT; 389 390 addr /= total_size; 391 392 f[i * BLOCK_WIDTH + y * pitch + x] = addr; 393 } 394 395 pipe->transfer_unmap(pipe, buf_transfer); 396 pipe->transfer_destroy(pipe, buf_transfer); 397 398 memset(&sv_tmpl, 0, sizeof(sv_tmpl)); 399 u_sampler_view_default_template(&sv_tmpl, res, res->format); 400 sv = pipe->create_sampler_view(pipe, res, &sv_tmpl); 401 pipe_resource_reference(&res, NULL); 402 if (!sv) 403 goto error_map; 404 405 return sv; 406 407error_map: 408 pipe->transfer_destroy(pipe, buf_transfer); 409 410error_transfer: 411 pipe_resource_reference(&res, NULL); 412 413error_resource: 414 return NULL; 415} 416 417#if 0 418// TODO 419struct pipe_sampler_view * 420vl_zscan_normal(struct pipe_context *pipe, unsigned blocks_per_line); 421 422struct pipe_sampler_view * 423vl_zscan_alternate(struct pipe_context *pipe, unsigned blocks_per_line); 424#endif 425 426bool 427vl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe, 428 unsigned buffer_width, unsigned buffer_height, 429 unsigned blocks_per_line, unsigned blocks_total, 430 unsigned num_channels) 431{ 432 assert(zscan && pipe); 433 434 zscan->pipe = pipe; 435 zscan->buffer_width = buffer_width; 436 zscan->buffer_height = buffer_height; 437 zscan->num_channels = num_channels; 438 zscan->blocks_per_line = blocks_per_line; 439 zscan->blocks_total = blocks_total; 440 441 if(!init_shaders(zscan)) 442 return false; 443 444 if(!init_state(zscan)) { 445 cleanup_shaders(zscan); 446 return false; 447 } 448 449 return true; 450} 451 452void 453vl_zscan_cleanup(struct vl_zscan *zscan) 454{ 455 assert(zscan); 456 457 cleanup_shaders(zscan); 458 cleanup_state(zscan); 459} 460 461#if 0 462// TODO 463void 464vl_zscan_upload_quant(struct vl_zscan *zscan, ...); 465#endif 466 467bool 468vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer, 469 struct pipe_sampler_view *src, struct pipe_surface *dst) 470{ 471 assert(zscan && buffer); 472 473 memset(buffer, 0, sizeof(struct vl_zscan_buffer)); 474 475 buffer->zscan = zscan; 476 477 pipe_sampler_view_reference(&buffer->src, src); 478 479 buffer->viewport.scale[0] = dst->width; 480 buffer->viewport.scale[1] = dst->height; 481 buffer->viewport.scale[2] = 1; 482 buffer->viewport.scale[3] = 1; 483 buffer->viewport.translate[0] = 0; 484 buffer->viewport.translate[1] = 0; 485 buffer->viewport.translate[2] = 0; 486 buffer->viewport.translate[3] = 0; 487 488 buffer->fb_state.width = dst->width; 489 buffer->fb_state.height = dst->height; 490 buffer->fb_state.nr_cbufs = 1; 491 pipe_surface_reference(&buffer->fb_state.cbufs[0], dst); 492 493 return true; 494} 495 496void 497vl_zscan_cleanup_buffer(struct vl_zscan_buffer *buffer) 498{ 499 assert(buffer); 500 501 pipe_sampler_view_reference(&buffer->src, NULL); 502 pipe_sampler_view_reference(&buffer->layout, NULL); 503 pipe_sampler_view_reference(&buffer->quant, NULL); 504 pipe_surface_reference(&buffer->fb_state.cbufs[0], NULL); 505} 506 507void 508vl_zscan_set_layout(struct vl_zscan_buffer *buffer, struct pipe_sampler_view *layout) 509{ 510 assert(buffer); 511 assert(layout); 512 513 pipe_sampler_view_reference(&buffer->layout, layout); 514} 515 516void 517vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances) 518{ 519 struct vl_zscan *zscan; 520 521 assert(buffer); 522 523 zscan = buffer->zscan; 524 525 zscan->pipe->bind_rasterizer_state(zscan->pipe, zscan->rs_state); 526 zscan->pipe->bind_blend_state(zscan->pipe, zscan->blend); 527 zscan->pipe->bind_fragment_sampler_states(zscan->pipe, 2, zscan->samplers); 528 zscan->pipe->set_framebuffer_state(zscan->pipe, &buffer->fb_state); 529 zscan->pipe->set_viewport_state(zscan->pipe, &buffer->viewport); 530 zscan->pipe->set_fragment_sampler_views(zscan->pipe, 2, &buffer->src); 531 zscan->pipe->bind_vs_state(zscan->pipe, zscan->vs); 532 zscan->pipe->bind_fs_state(zscan->pipe, zscan->fs); 533 util_draw_arrays_instanced(zscan->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances); 534} 535