asm_fill.h revision 3b4c8886539b02653761f092a387c27b5c562496
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial portions 15 * of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 20 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 * 25 **************************************************************************/ 26 27#ifndef ASM_FILL_H 28#define ASM_FILL_H 29 30#include "tgsi/tgsi_ureg.h" 31 32typedef void (* ureg_func)( struct ureg_program *ureg, 33 struct ureg_dst *out, 34 struct ureg_src *in, 35 struct ureg_src *sampler, 36 struct ureg_dst *temp, 37 struct ureg_src *constant); 38 39static INLINE void 40solid_fill( struct ureg_program *ureg, 41 struct ureg_dst *out, 42 struct ureg_src *in, 43 struct ureg_src *sampler, 44 struct ureg_dst *temp, 45 struct ureg_src *constant) 46{ 47 ureg_MOV(ureg, *out, constant[2]); 48} 49 50/** 51 * Perform frag-coord-to-paint-coord transform. The transformation is in 52 * CONST[4..6]. 53 */ 54#define PAINT_TRANSFORM \ 55 ureg_MOV(ureg, ureg_writemask(temp[0], TGSI_WRITEMASK_XY), in[0]); \ 56 ureg_MOV(ureg, \ 57 ureg_writemask(temp[0], TGSI_WRITEMASK_Z), \ 58 ureg_scalar(constant[3], TGSI_SWIZZLE_Y)); \ 59 ureg_DP3(ureg, temp[1], constant[4], ureg_src(temp[0])); \ 60 ureg_DP3(ureg, temp[2], constant[5], ureg_src(temp[0])); \ 61 ureg_DP3(ureg, temp[3], constant[6], ureg_src(temp[0])); \ 62 ureg_RCP(ureg, temp[3], ureg_src(temp[3])); \ 63 ureg_MUL(ureg, temp[1], ureg_src(temp[1]), ureg_src(temp[3])); \ 64 ureg_MUL(ureg, temp[2], ureg_src(temp[2]), ureg_src(temp[3])); \ 65 ureg_MOV(ureg, \ 66 ureg_writemask(temp[4], TGSI_WRITEMASK_X), \ 67 ureg_src(temp[1])); \ 68 ureg_MOV(ureg, \ 69 ureg_writemask(temp[4], TGSI_WRITEMASK_Y), \ 70 ureg_src(temp[2])); 71 72static INLINE void 73linear_grad( struct ureg_program *ureg, 74 struct ureg_dst *out, 75 struct ureg_src *in, 76 struct ureg_src *sampler, 77 struct ureg_dst *temp, 78 struct ureg_src *constant) 79{ 80 PAINT_TRANSFORM 81 82 /* grad = DP2((x, y), CONST[2].xy) * CONST[2].z */ 83 ureg_MUL(ureg, temp[0], 84 ureg_scalar(constant[2], TGSI_SWIZZLE_Y), 85 ureg_scalar(ureg_src(temp[4]), TGSI_SWIZZLE_Y)); 86 ureg_MAD(ureg, temp[1], 87 ureg_scalar(constant[2], TGSI_SWIZZLE_X), 88 ureg_scalar(ureg_src(temp[4]), TGSI_SWIZZLE_X), 89 ureg_src(temp[0])); 90 ureg_MUL(ureg, temp[2], ureg_src(temp[1]), 91 ureg_scalar(constant[2], TGSI_SWIZZLE_Z)); 92 93 ureg_TEX(ureg, *out, TGSI_TEXTURE_1D, ureg_src(temp[2]), sampler[0]); 94} 95 96static INLINE void 97radial_grad( struct ureg_program *ureg, 98 struct ureg_dst *out, 99 struct ureg_src *in, 100 struct ureg_src *sampler, 101 struct ureg_dst *temp, 102 struct ureg_src *constant) 103{ 104 PAINT_TRANSFORM 105 106 /* 107 * Calculate (sqrt(B^2 + AC) - B) / A, where 108 * 109 * A is CONST[2].z, 110 * B is DP2((x, y), CONST[2].xy), and 111 * C is DP2((x, y), (x, y)). 112 */ 113 114 /* B and C */ 115 ureg_DP2(ureg, temp[0], ureg_src(temp[4]), constant[2]); 116 ureg_DP2(ureg, temp[1], ureg_src(temp[4]), ureg_src(temp[4])); 117 118 /* the square root */ 119 ureg_MUL(ureg, temp[2], ureg_src(temp[0]), ureg_src(temp[0])); 120 ureg_MAD(ureg, temp[3], ureg_src(temp[1]), 121 ureg_scalar(constant[2], TGSI_SWIZZLE_Z), ureg_src(temp[2])); 122 ureg_RSQ(ureg, temp[3], ureg_src(temp[3])); 123 ureg_RCP(ureg, temp[3], ureg_src(temp[3])); 124 125 ureg_SUB(ureg, temp[3], ureg_src(temp[3]), ureg_src(temp[0])); 126 ureg_RCP(ureg, temp[0], ureg_scalar(constant[2], TGSI_SWIZZLE_Z)); 127 ureg_MUL(ureg, temp[0], ureg_src(temp[0]), ureg_src(temp[3])); 128 129 ureg_TEX(ureg, *out, TGSI_TEXTURE_1D, ureg_src(temp[0]), sampler[0]); 130} 131 132 133static INLINE void 134pattern( struct ureg_program *ureg, 135 struct ureg_dst *out, 136 struct ureg_src *in, 137 struct ureg_src *sampler, 138 struct ureg_dst *temp, 139 struct ureg_src *constant) 140{ 141 PAINT_TRANSFORM 142 143 /* (s, t) = (x / tex_width, y / tex_height) */ 144 ureg_RCP(ureg, temp[0], 145 ureg_swizzle(constant[3], 146 TGSI_SWIZZLE_Z, 147 TGSI_SWIZZLE_W, 148 TGSI_SWIZZLE_Z, 149 TGSI_SWIZZLE_W)); 150 ureg_MOV(ureg, temp[1], ureg_src(temp[4])); 151 ureg_MUL(ureg, 152 ureg_writemask(temp[1], TGSI_WRITEMASK_X), 153 ureg_src(temp[1]), 154 ureg_src(temp[0])); 155 ureg_MUL(ureg, 156 ureg_writemask(temp[1], TGSI_WRITEMASK_Y), 157 ureg_src(temp[1]), 158 ureg_src(temp[0])); 159 160 ureg_TEX(ureg, *out, TGSI_TEXTURE_2D, ureg_src(temp[1]), sampler[0]); 161} 162 163static INLINE void 164paint_degenerate( struct ureg_program *ureg, 165 struct ureg_dst *out, 166 struct ureg_src *in, 167 struct ureg_src *sampler, 168 struct ureg_dst *temp, 169 struct ureg_src *constant) 170{ 171 /* CONST[3].y is 1.0f */ 172 ureg_MOV(ureg, temp[1], ureg_scalar(constant[3], TGSI_SWIZZLE_Y)); 173 ureg_TEX(ureg, *out, TGSI_TEXTURE_1D, ureg_src(temp[1]), sampler[0]); 174} 175 176static INLINE void 177color_transform( struct ureg_program *ureg, 178 struct ureg_dst *out, 179 struct ureg_src *in, 180 struct ureg_src *sampler, 181 struct ureg_dst *temp, 182 struct ureg_src *constant) 183{ 184 ureg_MAD(ureg, temp[1], ureg_src(temp[0]), constant[0], constant[1]); 185 /* clamp to [0.0f, 1.0f] */ 186 ureg_CLAMP(ureg, temp[1], 187 ureg_src(temp[1]), 188 ureg_scalar(constant[3], TGSI_SWIZZLE_X), 189 ureg_scalar(constant[3], TGSI_SWIZZLE_Y)); 190 ureg_MOV(ureg, *out, ureg_src(temp[1])); 191} 192 193static INLINE void 194mask( struct ureg_program *ureg, 195 struct ureg_dst *out, 196 struct ureg_src *in, 197 struct ureg_src *sampler, 198 struct ureg_dst *temp, 199 struct ureg_src *constant) 200{ 201 ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[1]); 202 ureg_MUL(ureg, ureg_writemask(temp[0], TGSI_WRITEMASK_W), 203 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), 204 ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); 205 ureg_MOV(ureg, *out, ureg_src(temp[0])); 206} 207 208static INLINE void 209image_normal( struct ureg_program *ureg, 210 struct ureg_dst *out, 211 struct ureg_src *in, 212 struct ureg_src *sampler, 213 struct ureg_dst *temp, 214 struct ureg_src *constant) 215{ 216 ureg_TEX(ureg, *out, TGSI_TEXTURE_2D, in[1], sampler[3]); 217} 218 219 220static INLINE void 221image_multiply( struct ureg_program *ureg, 222 struct ureg_dst *out, 223 struct ureg_src *in, 224 struct ureg_src *sampler, 225 struct ureg_dst *temp, 226 struct ureg_src *constant) 227{ 228 ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[1], sampler[3]); 229 ureg_MUL(ureg, *out, ureg_src(temp[0]), ureg_src(temp[1])); 230} 231 232 233static INLINE void 234image_stencil( struct ureg_program *ureg, 235 struct ureg_dst *out, 236 struct ureg_src *in, 237 struct ureg_src *sampler, 238 struct ureg_dst *temp, 239 struct ureg_src *constant) 240{ 241 ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[1], sampler[3]); 242 ureg_MUL(ureg, *out, ureg_src(temp[0]), ureg_src(temp[1])); 243} 244 245/** 246 * Emit instructions for the specified blend mode. Colors should be 247 * premultiplied. Two temporary registers are required. 248 * 249 * XXX callers do not pass premultiplied colors! 250 */ 251static INLINE void 252blend_generic(struct ureg_program *ureg, 253 VGBlendMode mode, 254 struct ureg_dst out, 255 struct ureg_src src, 256 struct ureg_src dst, 257 struct ureg_src src_channel_alpha, 258 struct ureg_src one, 259 struct ureg_dst temp[2]) 260{ 261 switch (mode) { 262 case VG_BLEND_SRC: 263 ureg_MOV(ureg, out, src); 264 break; 265 case VG_BLEND_SRC_OVER: 266 /* RGBA_out = RGBA_src + (1 - A_src) * RGBA_dst */ 267 ureg_SUB(ureg, temp[0], one, src_channel_alpha); 268 ureg_MAD(ureg, out, ureg_src(temp[0]), dst, src); 269 break; 270 case VG_BLEND_DST_OVER: 271 /* RGBA_out = RGBA_dst + (1 - A_dst) * RGBA_src */ 272 ureg_SUB(ureg, temp[0], one, ureg_scalar(dst, TGSI_SWIZZLE_W)); 273 ureg_MAD(ureg, out, ureg_src(temp[0]), src, dst); 274 break; 275 case VG_BLEND_SRC_IN: 276 ureg_MUL(ureg, out, src, ureg_scalar(dst, TGSI_SWIZZLE_W)); 277 break; 278 case VG_BLEND_DST_IN: 279 ureg_MUL(ureg, out, dst, src_channel_alpha); 280 break; 281 case VG_BLEND_MULTIPLY: 282 /* 283 * RGB_out = (1 - A_dst) * RGB_src + (1 - A_src) * RGB_dst + 284 * RGB_src * RGB_dst 285 */ 286 ureg_MAD(ureg, temp[0], 287 ureg_scalar(dst, TGSI_SWIZZLE_W), ureg_negate(src), src); 288 ureg_MAD(ureg, temp[1], 289 src_channel_alpha, ureg_negate(dst), dst); 290 ureg_MAD(ureg, temp[1], src, dst, ureg_src(temp[1])); 291 ureg_ADD(ureg, out, ureg_src(temp[0]), ureg_src(temp[1])); 292 /* alpha is src over */ 293 ureg_ADD(ureg, ureg_writemask(out, TGSI_WRITEMASK_W), 294 src, ureg_src(temp[1])); 295 break; 296 case VG_BLEND_SCREEN: 297 /* RGBA_out = RGBA_src + (1 - RGBA_src) * RGBA_dst */ 298 ureg_SUB(ureg, temp[0], one, src); 299 ureg_MAD(ureg, out, ureg_src(temp[0]), dst, src); 300 break; 301 case VG_BLEND_DARKEN: 302 case VG_BLEND_LIGHTEN: 303 /* src over */ 304 ureg_SUB(ureg, temp[0], one, src_channel_alpha); 305 ureg_MAD(ureg, temp[0], ureg_src(temp[0]), dst, src); 306 /* dst over */ 307 ureg_SUB(ureg, temp[1], one, ureg_scalar(dst, TGSI_SWIZZLE_W)); 308 ureg_MAD(ureg, temp[1], ureg_src(temp[1]), src, dst); 309 /* take min/max for colors */ 310 if (mode == VG_BLEND_DARKEN) { 311 ureg_MIN(ureg, ureg_writemask(out, TGSI_WRITEMASK_XYZ), 312 ureg_src(temp[0]), ureg_src(temp[1])); 313 } 314 else { 315 ureg_MAX(ureg, ureg_writemask(out, TGSI_WRITEMASK_XYZ), 316 ureg_src(temp[0]), ureg_src(temp[1])); 317 } 318 break; 319 case VG_BLEND_ADDITIVE: 320 /* RGBA_out = RGBA_src + RGBA_dst */ 321 ureg_ADD(ureg, temp[0], src, dst); 322 ureg_MIN(ureg, out, ureg_src(temp[0]), one); 323 break; 324 default: 325 assert(0); 326 break; 327 } 328} 329 330static INLINE void 331blend_multiply( struct ureg_program *ureg, 332 struct ureg_dst *out, 333 struct ureg_src *in, 334 struct ureg_src *sampler, 335 struct ureg_dst *temp, 336 struct ureg_src *constant) 337{ 338 ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]); 339 blend_generic(ureg, VG_BLEND_MULTIPLY, *out, 340 ureg_src(temp[0]), 341 ureg_src(temp[1]), 342 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), 343 ureg_scalar(constant[3], TGSI_SWIZZLE_Y), 344 temp + 2); 345} 346 347static INLINE void 348blend_screen( struct ureg_program *ureg, 349 struct ureg_dst *out, 350 struct ureg_src *in, 351 struct ureg_src *sampler, 352 struct ureg_dst *temp, 353 struct ureg_src *constant) 354{ 355 ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]); 356 blend_generic(ureg, VG_BLEND_SCREEN, *out, 357 ureg_src(temp[0]), 358 ureg_src(temp[1]), 359 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), 360 ureg_scalar(constant[3], TGSI_SWIZZLE_Y), 361 temp + 2); 362} 363 364static INLINE void 365blend_darken( struct ureg_program *ureg, 366 struct ureg_dst *out, 367 struct ureg_src *in, 368 struct ureg_src *sampler, 369 struct ureg_dst *temp, 370 struct ureg_src *constant) 371{ 372 ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]); 373 blend_generic(ureg, VG_BLEND_DARKEN, *out, 374 ureg_src(temp[0]), 375 ureg_src(temp[1]), 376 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), 377 ureg_scalar(constant[3], TGSI_SWIZZLE_Y), 378 temp + 2); 379} 380 381static INLINE void 382blend_lighten( struct ureg_program *ureg, 383 struct ureg_dst *out, 384 struct ureg_src *in, 385 struct ureg_src *sampler, 386 struct ureg_dst *temp, 387 struct ureg_src *constant) 388{ 389 ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]); 390 blend_generic(ureg, VG_BLEND_LIGHTEN, *out, 391 ureg_src(temp[0]), 392 ureg_src(temp[1]), 393 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), 394 ureg_scalar(constant[3], TGSI_SWIZZLE_Y), 395 temp + 2); 396} 397 398static INLINE void 399premultiply( struct ureg_program *ureg, 400 struct ureg_dst *out, 401 struct ureg_src *in, 402 struct ureg_src *sampler, 403 struct ureg_dst *temp, 404 struct ureg_src *constant) 405{ 406 ureg_MUL(ureg, 407 ureg_writemask(temp[0], TGSI_WRITEMASK_XYZ), 408 ureg_src(temp[0]), 409 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W)); 410} 411 412static INLINE void 413unpremultiply( struct ureg_program *ureg, 414 struct ureg_dst *out, 415 struct ureg_src *in, 416 struct ureg_src *sampler, 417 struct ureg_dst *temp, 418 struct ureg_src *constant) 419{ 420 ureg_TEX(ureg, temp[0], TGSI_TEXTURE_2D, in[0], sampler[1]); 421} 422 423 424static INLINE void 425color_bw( struct ureg_program *ureg, 426 struct ureg_dst *out, 427 struct ureg_src *in, 428 struct ureg_src *sampler, 429 struct ureg_dst *temp, 430 struct ureg_src *constant) 431{ 432 ureg_ADD(ureg, temp[1], 433 ureg_scalar(constant[3], TGSI_SWIZZLE_Y), 434 ureg_scalar(constant[3], TGSI_SWIZZLE_Y)); 435 ureg_RCP(ureg, temp[2], ureg_src(temp[1])); 436 ureg_ADD(ureg, temp[1], 437 ureg_scalar(constant[3], TGSI_SWIZZLE_Y), 438 ureg_src(temp[2])); 439 ureg_ADD(ureg, ureg_writemask(temp[2], TGSI_WRITEMASK_X), 440 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_X), 441 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_Y)); 442 ureg_ADD(ureg, ureg_writemask(temp[2], TGSI_WRITEMASK_X), 443 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_Z), 444 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_X)); 445 ureg_SGE(ureg, 446 ureg_writemask(temp[0], TGSI_WRITEMASK_XYZ), 447 ureg_scalar(ureg_src(temp[2]), TGSI_SWIZZLE_X), 448 ureg_src(temp[1])); 449 ureg_SGE(ureg, 450 ureg_writemask(temp[0], TGSI_WRITEMASK_W), 451 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), 452 ureg_scalar(ureg_src(temp[2]), TGSI_SWIZZLE_Y)); 453 ureg_MOV(ureg, *out, ureg_src(temp[0])); 454} 455 456 457struct shader_asm_info { 458 VGint id; 459 ureg_func func; 460 461 VGboolean needs_position; 462 463 VGint start_const; 464 VGint num_consts; 465 466 VGint start_sampler; 467 VGint num_samplers; 468 469 VGint start_temp; 470 VGint num_temps; 471}; 472 473 474/* paint types */ 475static const struct shader_asm_info shaders_paint_asm[] = { 476 {VEGA_SOLID_FILL_SHADER, solid_fill, 477 VG_FALSE, 2, 1, 0, 0, 0, 0}, 478 {VEGA_LINEAR_GRADIENT_SHADER, linear_grad, 479 VG_TRUE, 2, 5, 0, 1, 0, 5}, 480 {VEGA_RADIAL_GRADIENT_SHADER, radial_grad, 481 VG_TRUE, 2, 5, 0, 1, 0, 5}, 482 {VEGA_PATTERN_SHADER, pattern, 483 VG_TRUE, 3, 4, 0, 1, 0, 5}, 484 {VEGA_PAINT_DEGENERATE_SHADER, paint_degenerate, 485 VG_FALSE, 3, 1, 0, 1, 0, 2} 486}; 487 488/* image draw modes */ 489static const struct shader_asm_info shaders_image_asm[] = { 490 {VEGA_IMAGE_NORMAL_SHADER, image_normal, 491 VG_TRUE, 0, 0, 3, 1, 0, 0}, 492 {VEGA_IMAGE_MULTIPLY_SHADER, image_multiply, 493 VG_TRUE, 0, 0, 3, 1, 0, 2}, 494 {VEGA_IMAGE_STENCIL_SHADER, image_stencil, 495 VG_TRUE, 0, 0, 3, 1, 0, 2} 496}; 497 498static const struct shader_asm_info shaders_color_transform_asm[] = { 499 {VEGA_COLOR_TRANSFORM_SHADER, color_transform, 500 VG_FALSE, 0, 4, 0, 0, 0, 2} 501}; 502 503static const struct shader_asm_info shaders_mask_asm[] = { 504 {VEGA_MASK_SHADER, mask, 505 VG_TRUE, 0, 0, 1, 1, 0, 2} 506}; 507 508/* extra blend modes */ 509static const struct shader_asm_info shaders_blend_asm[] = { 510 {VEGA_BLEND_MULTIPLY_SHADER, blend_multiply, 511 VG_TRUE, 3, 1, 2, 1, 0, 4}, 512 {VEGA_BLEND_SCREEN_SHADER, blend_screen, 513 VG_TRUE, 3, 1, 2, 1, 0, 4}, 514 {VEGA_BLEND_DARKEN_SHADER, blend_darken, 515 VG_TRUE, 3, 1, 2, 1, 0, 4}, 516 {VEGA_BLEND_LIGHTEN_SHADER, blend_lighten, 517 VG_TRUE, 3, 1, 2, 1, 0, 4}, 518}; 519 520/* premultiply */ 521static const struct shader_asm_info shaders_premultiply_asm[] = { 522 {VEGA_PREMULTIPLY_SHADER, premultiply, 523 VG_FALSE, 0, 0, 0, 0, 0, 1}, 524 {VEGA_UNPREMULTIPLY_SHADER, unpremultiply, 525 VG_FALSE, 0, 0, 0, 0, 0, 1}, 526}; 527 528/* color transform to black and white */ 529static const struct shader_asm_info shaders_bw_asm[] = { 530 {VEGA_BW_SHADER, color_bw, 531 VG_FALSE, 3, 1, 0, 0, 0, 3}, 532}; 533 534#endif 535