asm_fill.h revision a19eaaa6c1956add5343295af7e9f682efa08d74
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial portions 15 * of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 20 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 * 25 **************************************************************************/ 26 27#ifndef ASM_FILL_H 28#define ASM_FILL_H 29 30#include "tgsi/tgsi_ureg.h" 31 32typedef void (* ureg_func)( struct ureg_program *ureg, 33 struct ureg_dst *out, 34 struct ureg_src *in, 35 struct ureg_src *sampler, 36 struct ureg_dst *temp, 37 struct ureg_src *constant); 38 39static INLINE void 40solid_fill( struct ureg_program *ureg, 41 struct ureg_dst *out, 42 struct ureg_src *in, 43 struct ureg_src *sampler, 44 struct ureg_dst *temp, 45 struct ureg_src *constant) 46{ 47 ureg_MOV(ureg, *out, constant[2]); 48} 49 50/** 51 * Perform frag-coord-to-paint-coord transform. The transformation is in 52 * CONST[4..6]. 53 */ 54#define PAINT_TRANSFORM \ 55 ureg_MOV(ureg, ureg_writemask(temp[0], TGSI_WRITEMASK_XY), in[0]); \ 56 ureg_MOV(ureg, \ 57 ureg_writemask(temp[0], TGSI_WRITEMASK_Z), \ 58 ureg_scalar(constant[3], TGSI_SWIZZLE_Y)); \ 59 ureg_DP3(ureg, temp[1], constant[4], ureg_src(temp[0])); \ 60 ureg_DP3(ureg, temp[2], constant[5], ureg_src(temp[0])); \ 61 ureg_DP3(ureg, temp[3], constant[6], ureg_src(temp[0])); \ 62 ureg_RCP(ureg, temp[3], ureg_src(temp[3])); \ 63 ureg_MUL(ureg, temp[1], ureg_src(temp[1]), ureg_src(temp[3])); \ 64 ureg_MUL(ureg, temp[2], ureg_src(temp[2]), ureg_src(temp[3])); \ 65 ureg_MOV(ureg, \ 66 ureg_writemask(temp[4], TGSI_WRITEMASK_X), \ 67 ureg_src(temp[1])); \ 68 ureg_MOV(ureg, \ 69 ureg_writemask(temp[4], TGSI_WRITEMASK_Y), \ 70 ureg_src(temp[2])); 71 72static INLINE void 73linear_grad( struct ureg_program *ureg, 74 struct ureg_dst *out, 75 struct ureg_src *in, 76 struct ureg_src *sampler, 77 struct ureg_dst *temp, 78 struct ureg_src *constant) 79{ 80 PAINT_TRANSFORM 81 82 /* grad = DP2((x, y), CONST[2].xy) * CONST[2].z */ 83 ureg_MUL(ureg, temp[0], 84 ureg_scalar(constant[2], TGSI_SWIZZLE_Y), 85 ureg_scalar(ureg_src(temp[4]), TGSI_SWIZZLE_Y)); 86 ureg_MAD(ureg, temp[1], 87 ureg_scalar(constant[2], TGSI_SWIZZLE_X), 88 ureg_scalar(ureg_src(temp[4]), TGSI_SWIZZLE_X), 89 ureg_src(temp[0])); 90 ureg_MUL(ureg, temp[2], ureg_src(temp[1]), 91 ureg_scalar(constant[2], TGSI_SWIZZLE_Z)); 92 93 ureg_TEX(ureg, *out, TGSI_TEXTURE_1D, ureg_src(temp[2]), sampler[0]); 94} 95 96static INLINE void 97radial_grad( struct ureg_program *ureg, 98 struct ureg_dst *out, 99 struct ureg_src *in, 100 struct ureg_src *sampler, 101 struct ureg_dst *temp, 102 struct ureg_src *constant) 103{ 104 PAINT_TRANSFORM 105 106 /* 107 * Calculate (sqrt(B^2 + AC) - B) / A, where 108 * 109 * A is CONST[2].z, 110 * B is DP2((x, y), CONST[2].xy), and 111 * C is DP2((x, y), (x, y)). 112 */ 113 114 /* B and C */ 115 ureg_DP2(ureg, temp[0], ureg_src(temp[4]), constant[2]); 116 ureg_DP2(ureg, temp[1], ureg_src(temp[4]), ureg_src(temp[4])); 117 118 /* the square root */ 119 ureg_MUL(ureg, temp[2], ureg_src(temp[0]), ureg_src(temp[0])); 120 ureg_MAD(ureg, temp[3], ureg_src(temp[1]), 121 ureg_scalar(constant[2], TGSI_SWIZZLE_Z), ureg_src(temp[2])); 122 ureg_RSQ(ureg, temp[3], ureg_src(temp[3])); 123 ureg_RCP(ureg, temp[3], ureg_src(temp[3])); 124 125 ureg_SUB(ureg, temp[3], ureg_src(temp[3]), ureg_src(temp[0])); 126 ureg_RCP(ureg, temp[0], ureg_scalar(constant[2], TGSI_SWIZZLE_Z)); 127 ureg_MUL(ureg, temp[0], ureg_src(temp[0]), ureg_src(temp[3])); 128 129 ureg_TEX(ureg, *out, TGSI_TEXTURE_1D, ureg_src(temp[0]), sampler[0]); 130} 131 132 133static INLINE void 134pattern( struct ureg_program *ureg, 135 struct ureg_dst *out, 136 struct ureg_src *in, 137 struct ureg_src *sampler, 138 struct ureg_dst *temp, 139 struct ureg_src *constant) 140{ 141 PAINT_TRANSFORM 142 143 /* (s, t) = (x / tex_width, y / tex_height) */ 144 ureg_RCP(ureg, temp[0], 145 ureg_swizzle(constant[3], 146 TGSI_SWIZZLE_Z, 147 TGSI_SWIZZLE_W, 148 TGSI_SWIZZLE_Z, 149 TGSI_SWIZZLE_W)); 150 ureg_MOV(ureg, temp[1], ureg_src(temp[4])); 151 ureg_MUL(ureg, 152 ureg_writemask(temp[1], TGSI_WRITEMASK_X), 153 ureg_src(temp[1]), 154 ureg_src(temp[0])); 155 ureg_MUL(ureg, 156 ureg_writemask(temp[1], TGSI_WRITEMASK_Y), 157 ureg_src(temp[1]), 158 ureg_src(temp[0])); 159 160 ureg_TEX(ureg, *out, TGSI_TEXTURE_2D, ureg_src(temp[1]), sampler[0]); 161} 162 163static INLINE void 164paint_degenerate( struct ureg_program *ureg, 165 struct ureg_dst *out, 166 struct ureg_src *in, 167 struct ureg_src *sampler, 168 struct ureg_dst *temp, 169 struct ureg_src *constant) 170{ 171 /* CONST[3].y is 1.0f */ 172 ureg_MOV(ureg, temp[1], ureg_scalar(constant[3], TGSI_SWIZZLE_Y)); 173 ureg_TEX(ureg, *out, TGSI_TEXTURE_1D, ureg_src(temp[1]), sampler[0]); 174} 175 176static INLINE void 177image_normal( struct ureg_program *ureg, 178 struct ureg_dst *out, 179 struct ureg_src *in, 180 struct ureg_src *sampler, 181 struct ureg_dst *temp, 182 struct ureg_src *constant) 183{ 184 ureg_TEX(ureg, *out, TGSI_TEXTURE_2D, in[1], sampler[3]); 185} 186 187 188static INLINE void 189image_multiply( struct ureg_program *ureg, 190 struct ureg_dst *out, 191 struct ureg_src *in, 192 struct ureg_src *sampler, 193 struct ureg_dst *temp, 194 struct ureg_src *constant) 195{ 196 ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[1], sampler[3]); 197 ureg_MUL(ureg, *out, ureg_src(temp[0]), ureg_src(temp[1])); 198} 199 200 201static INLINE void 202image_stencil( struct ureg_program *ureg, 203 struct ureg_dst *out, 204 struct ureg_src *in, 205 struct ureg_src *sampler, 206 struct ureg_dst *temp, 207 struct ureg_src *constant) 208{ 209 ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[1], sampler[3]); 210 ureg_MUL(ureg, *out, ureg_src(temp[0]), ureg_src(temp[1])); 211} 212 213static INLINE void 214color_transform( struct ureg_program *ureg, 215 struct ureg_dst *out, 216 struct ureg_src *in, 217 struct ureg_src *sampler, 218 struct ureg_dst *temp, 219 struct ureg_src *constant) 220{ 221 ureg_MAD(ureg, temp[1], ureg_src(temp[0]), constant[0], constant[1]); 222 /* clamp to [0.0f, 1.0f] */ 223 ureg_CLAMP(ureg, temp[1], 224 ureg_src(temp[1]), 225 ureg_scalar(constant[3], TGSI_SWIZZLE_X), 226 ureg_scalar(constant[3], TGSI_SWIZZLE_Y)); 227 ureg_MOV(ureg, *out, ureg_src(temp[1])); 228} 229 230/** 231 * Emit instructions for the specified blend mode. Colors should be 232 * premultiplied. Two temporary registers are required. 233 * 234 * XXX callers do not pass premultiplied colors! 235 */ 236static INLINE void 237blend_generic(struct ureg_program *ureg, 238 VGBlendMode mode, 239 struct ureg_dst out, 240 struct ureg_src src, 241 struct ureg_src dst, 242 struct ureg_src src_channel_alpha, 243 struct ureg_src one, 244 struct ureg_dst temp[2]) 245{ 246 switch (mode) { 247 case VG_BLEND_SRC: 248 ureg_MOV(ureg, out, src); 249 break; 250 case VG_BLEND_SRC_OVER: 251 /* RGBA_out = RGBA_src + (1 - A_src) * RGBA_dst */ 252 ureg_SUB(ureg, temp[0], one, src_channel_alpha); 253 ureg_MAD(ureg, out, ureg_src(temp[0]), dst, src); 254 break; 255 case VG_BLEND_DST_OVER: 256 /* RGBA_out = RGBA_dst + (1 - A_dst) * RGBA_src */ 257 ureg_SUB(ureg, temp[0], one, ureg_scalar(dst, TGSI_SWIZZLE_W)); 258 ureg_MAD(ureg, out, ureg_src(temp[0]), src, dst); 259 break; 260 case VG_BLEND_SRC_IN: 261 ureg_MUL(ureg, out, src, ureg_scalar(dst, TGSI_SWIZZLE_W)); 262 break; 263 case VG_BLEND_DST_IN: 264 ureg_MUL(ureg, out, dst, src_channel_alpha); 265 break; 266 case VG_BLEND_MULTIPLY: 267 /* 268 * RGB_out = (1 - A_dst) * RGB_src + (1 - A_src) * RGB_dst + 269 * RGB_src * RGB_dst 270 */ 271 ureg_MAD(ureg, temp[0], 272 ureg_scalar(dst, TGSI_SWIZZLE_W), ureg_negate(src), src); 273 ureg_MAD(ureg, temp[1], 274 src_channel_alpha, ureg_negate(dst), dst); 275 ureg_MAD(ureg, temp[1], src, dst, ureg_src(temp[1])); 276 ureg_ADD(ureg, out, ureg_src(temp[0]), ureg_src(temp[1])); 277 /* alpha is src over */ 278 ureg_ADD(ureg, ureg_writemask(out, TGSI_WRITEMASK_W), 279 src, ureg_src(temp[1])); 280 break; 281 case VG_BLEND_SCREEN: 282 /* RGBA_out = RGBA_src + (1 - RGBA_src) * RGBA_dst */ 283 ureg_SUB(ureg, temp[0], one, src); 284 ureg_MAD(ureg, out, ureg_src(temp[0]), dst, src); 285 break; 286 case VG_BLEND_DARKEN: 287 case VG_BLEND_LIGHTEN: 288 /* src over */ 289 ureg_SUB(ureg, temp[0], one, src_channel_alpha); 290 ureg_MAD(ureg, temp[0], ureg_src(temp[0]), dst, src); 291 /* dst over */ 292 ureg_SUB(ureg, temp[1], one, ureg_scalar(dst, TGSI_SWIZZLE_W)); 293 ureg_MAD(ureg, temp[1], ureg_src(temp[1]), src, dst); 294 /* take min/max for colors */ 295 if (mode == VG_BLEND_DARKEN) { 296 ureg_MIN(ureg, ureg_writemask(out, TGSI_WRITEMASK_XYZ), 297 ureg_src(temp[0]), ureg_src(temp[1])); 298 } 299 else { 300 ureg_MAX(ureg, ureg_writemask(out, TGSI_WRITEMASK_XYZ), 301 ureg_src(temp[0]), ureg_src(temp[1])); 302 } 303 break; 304 case VG_BLEND_ADDITIVE: 305 /* RGBA_out = RGBA_src + RGBA_dst */ 306 ureg_ADD(ureg, temp[0], src, dst); 307 ureg_MIN(ureg, out, ureg_src(temp[0]), one); 308 break; 309 default: 310 assert(0); 311 break; 312 } 313} 314 315static INLINE void 316blend_multiply( struct ureg_program *ureg, 317 struct ureg_dst *out, 318 struct ureg_src *in, 319 struct ureg_src *sampler, 320 struct ureg_dst *temp, 321 struct ureg_src *constant) 322{ 323 ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]); 324 blend_generic(ureg, VG_BLEND_MULTIPLY, *out, 325 ureg_src(temp[0]), 326 ureg_src(temp[1]), 327 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), 328 ureg_scalar(constant[3], TGSI_SWIZZLE_Y), 329 temp + 2); 330} 331 332static INLINE void 333blend_screen( struct ureg_program *ureg, 334 struct ureg_dst *out, 335 struct ureg_src *in, 336 struct ureg_src *sampler, 337 struct ureg_dst *temp, 338 struct ureg_src *constant) 339{ 340 ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]); 341 blend_generic(ureg, VG_BLEND_SCREEN, *out, 342 ureg_src(temp[0]), 343 ureg_src(temp[1]), 344 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), 345 ureg_scalar(constant[3], TGSI_SWIZZLE_Y), 346 temp + 2); 347} 348 349static INLINE void 350blend_darken( struct ureg_program *ureg, 351 struct ureg_dst *out, 352 struct ureg_src *in, 353 struct ureg_src *sampler, 354 struct ureg_dst *temp, 355 struct ureg_src *constant) 356{ 357 ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]); 358 blend_generic(ureg, VG_BLEND_DARKEN, *out, 359 ureg_src(temp[0]), 360 ureg_src(temp[1]), 361 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), 362 ureg_scalar(constant[3], TGSI_SWIZZLE_Y), 363 temp + 2); 364} 365 366static INLINE void 367blend_lighten( struct ureg_program *ureg, 368 struct ureg_dst *out, 369 struct ureg_src *in, 370 struct ureg_src *sampler, 371 struct ureg_dst *temp, 372 struct ureg_src *constant) 373{ 374 ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]); 375 blend_generic(ureg, VG_BLEND_LIGHTEN, *out, 376 ureg_src(temp[0]), 377 ureg_src(temp[1]), 378 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), 379 ureg_scalar(constant[3], TGSI_SWIZZLE_Y), 380 temp + 2); 381} 382 383static INLINE void 384mask( struct ureg_program *ureg, 385 struct ureg_dst *out, 386 struct ureg_src *in, 387 struct ureg_src *sampler, 388 struct ureg_dst *temp, 389 struct ureg_src *constant) 390{ 391 ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[1]); 392 ureg_MUL(ureg, ureg_writemask(temp[0], TGSI_WRITEMASK_W), 393 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), 394 ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); 395 ureg_MOV(ureg, *out, ureg_src(temp[0])); 396} 397 398static INLINE void 399premultiply( struct ureg_program *ureg, 400 struct ureg_dst *out, 401 struct ureg_src *in, 402 struct ureg_src *sampler, 403 struct ureg_dst *temp, 404 struct ureg_src *constant) 405{ 406 ureg_MUL(ureg, 407 ureg_writemask(temp[0], TGSI_WRITEMASK_XYZ), 408 ureg_src(temp[0]), 409 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W)); 410} 411 412static INLINE void 413unpremultiply( struct ureg_program *ureg, 414 struct ureg_dst *out, 415 struct ureg_src *in, 416 struct ureg_src *sampler, 417 struct ureg_dst *temp, 418 struct ureg_src *constant) 419{ 420 ureg_TEX(ureg, temp[0], TGSI_TEXTURE_2D, in[0], sampler[1]); 421} 422 423 424static INLINE void 425color_bw( struct ureg_program *ureg, 426 struct ureg_dst *out, 427 struct ureg_src *in, 428 struct ureg_src *sampler, 429 struct ureg_dst *temp, 430 struct ureg_src *constant) 431{ 432 ureg_ADD(ureg, temp[1], 433 ureg_scalar(constant[3], TGSI_SWIZZLE_Y), 434 ureg_scalar(constant[3], TGSI_SWIZZLE_Y)); 435 ureg_RCP(ureg, temp[2], ureg_src(temp[1])); 436 ureg_ADD(ureg, temp[1], 437 ureg_scalar(constant[3], TGSI_SWIZZLE_Y), 438 ureg_src(temp[2])); 439 ureg_ADD(ureg, ureg_writemask(temp[2], TGSI_WRITEMASK_X), 440 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_X), 441 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_Y)); 442 ureg_ADD(ureg, ureg_writemask(temp[2], TGSI_WRITEMASK_X), 443 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_Z), 444 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_X)); 445 ureg_SGE(ureg, 446 ureg_writemask(temp[0], TGSI_WRITEMASK_XYZ), 447 ureg_scalar(ureg_src(temp[2]), TGSI_SWIZZLE_X), 448 ureg_src(temp[1])); 449 ureg_SGE(ureg, 450 ureg_writemask(temp[0], TGSI_WRITEMASK_W), 451 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), 452 ureg_scalar(ureg_src(temp[2]), TGSI_SWIZZLE_Y)); 453 ureg_MOV(ureg, *out, ureg_src(temp[0])); 454} 455 456 457struct shader_asm_info { 458 VGint id; 459 ureg_func func; 460 461 VGboolean needs_position; 462 463 VGint start_const; 464 VGint num_consts; 465 466 VGint start_sampler; 467 VGint num_samplers; 468 469 VGint start_temp; 470 VGint num_temps; 471}; 472 473 474/* paint types */ 475static const struct shader_asm_info shaders_paint_asm[] = { 476 {VEGA_SOLID_FILL_SHADER, solid_fill, 477 VG_FALSE, 2, 1, 0, 0, 0, 0}, 478 {VEGA_LINEAR_GRADIENT_SHADER, linear_grad, 479 VG_TRUE, 2, 5, 0, 1, 0, 5}, 480 {VEGA_RADIAL_GRADIENT_SHADER, radial_grad, 481 VG_TRUE, 2, 5, 0, 1, 0, 5}, 482 {VEGA_PATTERN_SHADER, pattern, 483 VG_TRUE, 3, 4, 0, 1, 0, 5}, 484 {VEGA_PAINT_DEGENERATE_SHADER, paint_degenerate, 485 VG_FALSE, 3, 1, 0, 1, 0, 2} 486}; 487 488/* image draw modes */ 489static const struct shader_asm_info shaders_image_asm[] = { 490 {VEGA_IMAGE_NORMAL_SHADER, image_normal, 491 VG_TRUE, 0, 0, 3, 1, 0, 0}, 492 {VEGA_IMAGE_MULTIPLY_SHADER, image_multiply, 493 VG_TRUE, 0, 0, 3, 1, 0, 2}, 494 {VEGA_IMAGE_STENCIL_SHADER, image_stencil, 495 VG_TRUE, 0, 0, 3, 1, 0, 2} 496}; 497 498static const struct shader_asm_info shaders_color_transform_asm[] = { 499 {VEGA_COLOR_TRANSFORM_SHADER, color_transform, 500 VG_FALSE, 0, 4, 0, 0, 0, 2} 501}; 502 503/* extra blend modes */ 504static const struct shader_asm_info shaders_blend_asm[] = { 505 {VEGA_BLEND_MULTIPLY_SHADER, blend_multiply, 506 VG_TRUE, 3, 1, 2, 1, 0, 4}, 507 {VEGA_BLEND_SCREEN_SHADER, blend_screen, 508 VG_TRUE, 3, 1, 2, 1, 0, 4}, 509 {VEGA_BLEND_DARKEN_SHADER, blend_darken, 510 VG_TRUE, 3, 1, 2, 1, 0, 4}, 511 {VEGA_BLEND_LIGHTEN_SHADER, blend_lighten, 512 VG_TRUE, 3, 1, 2, 1, 0, 4}, 513}; 514 515static const struct shader_asm_info shaders_mask_asm[] = { 516 {VEGA_MASK_SHADER, mask, 517 VG_TRUE, 0, 0, 1, 1, 0, 2} 518}; 519 520/* premultiply */ 521static const struct shader_asm_info shaders_premultiply_asm[] = { 522 {VEGA_PREMULTIPLY_SHADER, premultiply, 523 VG_FALSE, 0, 0, 0, 0, 0, 1}, 524 {VEGA_UNPREMULTIPLY_SHADER, unpremultiply, 525 VG_FALSE, 0, 0, 0, 0, 0, 1}, 526}; 527 528/* color transform to black and white */ 529static const struct shader_asm_info shaders_bw_asm[] = { 530 {VEGA_BW_SHADER, color_bw, 531 VG_FALSE, 3, 1, 0, 0, 0, 3}, 532}; 533 534#endif 535