1/* 2 * Copyright (c) 2012 3 * MIPS Technologies, Inc., California. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its 14 * contributors may be used to endorse or promote products derived from 15 * this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * Author: Nemanja Lukic (nlukic@mips.com) 30 */ 31 32#ifndef PIXMAN_MIPS_DSPR2_ASM_H 33#define PIXMAN_MIPS_DSPR2_ASM_H 34 35#define zero $0 36#define AT $1 37#define v0 $2 38#define v1 $3 39#define a0 $4 40#define a1 $5 41#define a2 $6 42#define a3 $7 43#define t0 $8 44#define t1 $9 45#define t2 $10 46#define t3 $11 47#define t4 $12 48#define t5 $13 49#define t6 $14 50#define t7 $15 51#define s0 $16 52#define s1 $17 53#define s2 $18 54#define s3 $19 55#define s4 $20 56#define s5 $21 57#define s6 $22 58#define s7 $23 59#define t8 $24 60#define t9 $25 61#define k0 $26 62#define k1 $27 63#define gp $28 64#define sp $29 65#define fp $30 66#define s8 $30 67#define ra $31 68 69/* 70 * LEAF_MIPS32R2 - declare leaf routine for MIPS32r2 71 */ 72#define LEAF_MIPS32R2(symbol) \ 73 .globl symbol; \ 74 .align 2; \ 75 .type symbol, @function; \ 76 .ent symbol, 0; \ 77symbol: .frame sp, 0, ra; \ 78 .set push; \ 79 .set arch=mips32r2; \ 80 .set noreorder; \ 81 .set noat; 82 83/* 84 * LEAF_MIPS32R2 - declare leaf routine for MIPS DSPr2 85 */ 86#define LEAF_MIPS_DSPR2(symbol) \ 87LEAF_MIPS32R2(symbol) \ 88 .set dspr2; 89 90/* 91 * END - mark end of function 92 */ 93#define END(function) \ 94 .set pop; \ 95 .end function; \ 96 .size function,.-function 97 98/* 99 * Checks if stack offset is big enough for storing/restoring regs_num 100 * number of register to/from stack. Stack offset must be greater than 101 * or equal to the number of bytes needed for storing registers (regs_num*4). 102 * Since MIPS ABI allows usage of first 16 bytes of stack frame (this is 103 * preserved for input arguments of the functions, already stored in a0-a3), 104 * stack size can be further optimized by utilizing this space. 105 */ 106.macro CHECK_STACK_OFFSET regs_num, stack_offset 107.if \stack_offset < \regs_num * 4 - 16 108.error "Stack offset too small." 109.endif 110.endm 111 112/* 113 * Saves set of registers on stack. Maximum number of registers that 114 * can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7). 115 * Stack offset is number of bytes that are added to stack pointer (sp) 116 * before registers are pushed in order to provide enough space on stack 117 * (offset must be multiple of 4, and must be big enough, as described by 118 * CHECK_STACK_OFFSET macro). This macro is intended to be used in 119 * combination with RESTORE_REGS_FROM_STACK macro. Example: 120 * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1 121 * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1 122 */ 123.macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \ 124 r2 = 0, r3 = 0, r4 = 0, \ 125 r5 = 0, r6 = 0, r7 = 0, \ 126 r8 = 0, r9 = 0, r10 = 0, \ 127 r11 = 0, r12 = 0, r13 = 0, \ 128 r14 = 0 129 .if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4) 130 .error "Stack offset must be pozitive and multiple of 4." 131 .endif 132 .if \stack_offset != 0 133 addiu sp, sp, -\stack_offset 134 .endif 135 sw \r1, 0(sp) 136 .if \r2 != 0 137 sw \r2, 4(sp) 138 .endif 139 .if \r3 != 0 140 sw \r3, 8(sp) 141 .endif 142 .if \r4 != 0 143 sw \r4, 12(sp) 144 .endif 145 .if \r5 != 0 146 CHECK_STACK_OFFSET 5, \stack_offset 147 sw \r5, 16(sp) 148 .endif 149 .if \r6 != 0 150 CHECK_STACK_OFFSET 6, \stack_offset 151 sw \r6, 20(sp) 152 .endif 153 .if \r7 != 0 154 CHECK_STACK_OFFSET 7, \stack_offset 155 sw \r7, 24(sp) 156 .endif 157 .if \r8 != 0 158 CHECK_STACK_OFFSET 8, \stack_offset 159 sw \r8, 28(sp) 160 .endif 161 .if \r9 != 0 162 CHECK_STACK_OFFSET 9, \stack_offset 163 sw \r9, 32(sp) 164 .endif 165 .if \r10 != 0 166 CHECK_STACK_OFFSET 10, \stack_offset 167 sw \r10, 36(sp) 168 .endif 169 .if \r11 != 0 170 CHECK_STACK_OFFSET 11, \stack_offset 171 sw \r11, 40(sp) 172 .endif 173 .if \r12 != 0 174 CHECK_STACK_OFFSET 12, \stack_offset 175 sw \r12, 44(sp) 176 .endif 177 .if \r13 != 0 178 CHECK_STACK_OFFSET 13, \stack_offset 179 sw \r13, 48(sp) 180 .endif 181 .if \r14 != 0 182 CHECK_STACK_OFFSET 14, \stack_offset 183 sw \r14, 52(sp) 184 .endif 185.endm 186 187/* 188 * Restores set of registers from stack. Maximum number of registers that 189 * can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7). 190 * Stack offset is number of bytes that are added to stack pointer (sp) 191 * after registers are restored (offset must be multiple of 4, and must 192 * be big enough, as described by CHECK_STACK_OFFSET macro). This macro is 193 * intended to be used in combination with RESTORE_REGS_FROM_STACK macro. 194 * Example: 195 * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1 196 * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1 197 */ 198.macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \ 199 r2 = 0, r3 = 0, r4 = 0, \ 200 r5 = 0, r6 = 0, r7 = 0, \ 201 r8 = 0, r9 = 0, r10 = 0, \ 202 r11 = 0, r12 = 0, r13 = 0, \ 203 r14 = 0 204 .if (\stack_offset < 0) || (\stack_offset - (\stack_offset/4)*4) 205 .error "Stack offset must be pozitive and multiple of 4." 206 .endif 207 lw \r1, 0(sp) 208 .if \r2 != 0 209 lw \r2, 4(sp) 210 .endif 211 .if \r3 != 0 212 lw \r3, 8(sp) 213 .endif 214 .if \r4 != 0 215 lw \r4, 12(sp) 216 .endif 217 .if \r5 != 0 218 CHECK_STACK_OFFSET 5, \stack_offset 219 lw \r5, 16(sp) 220 .endif 221 .if \r6 != 0 222 CHECK_STACK_OFFSET 6, \stack_offset 223 lw \r6, 20(sp) 224 .endif 225 .if \r7 != 0 226 CHECK_STACK_OFFSET 7, \stack_offset 227 lw \r7, 24(sp) 228 .endif 229 .if \r8 != 0 230 CHECK_STACK_OFFSET 8, \stack_offset 231 lw \r8, 28(sp) 232 .endif 233 .if \r9 != 0 234 CHECK_STACK_OFFSET 9, \stack_offset 235 lw \r9, 32(sp) 236 .endif 237 .if \r10 != 0 238 CHECK_STACK_OFFSET 10, \stack_offset 239 lw \r10, 36(sp) 240 .endif 241 .if \r11 != 0 242 CHECK_STACK_OFFSET 11, \stack_offset 243 lw \r11, 40(sp) 244 .endif 245 .if \r12 != 0 246 CHECK_STACK_OFFSET 12, \stack_offset 247 lw \r12, 44(sp) 248 .endif 249 .if \r13 != 0 250 CHECK_STACK_OFFSET 13, \stack_offset 251 lw \r13, 48(sp) 252 .endif 253 .if \r14 != 0 254 CHECK_STACK_OFFSET 14, \stack_offset 255 lw \r14, 52(sp) 256 .endif 257 .if \stack_offset != 0 258 addiu sp, sp, \stack_offset 259 .endif 260.endm 261 262/* 263 * Conversion of single r5g6b5 pixel (in_565) to single a8r8g8b8 pixel 264 * returned in (out_8888) register. Requires two temporary registers 265 * (scratch1 and scratch2). 266 */ 267.macro CONVERT_1x0565_TO_1x8888 in_565, \ 268 out_8888, \ 269 scratch1, scratch2 270 lui \out_8888, 0xff00 271 sll \scratch1, \in_565, 0x3 272 andi \scratch2, \scratch1, 0xff 273 ext \scratch1, \in_565, 0x2, 0x3 274 or \scratch1, \scratch2, \scratch1 275 or \out_8888, \out_8888, \scratch1 276 277 sll \scratch1, \in_565, 0x5 278 andi \scratch1, \scratch1, 0xfc00 279 srl \scratch2, \in_565, 0x1 280 andi \scratch2, \scratch2, 0x300 281 or \scratch2, \scratch1, \scratch2 282 or \out_8888, \out_8888, \scratch2 283 284 andi \scratch1, \in_565, 0xf800 285 srl \scratch2, \scratch1, 0x5 286 andi \scratch2, \scratch2, 0xff00 287 or \scratch1, \scratch1, \scratch2 288 sll \scratch1, \scratch1, 0x8 289 or \out_8888, \out_8888, \scratch1 290.endm 291 292/* 293 * Conversion of two r5g6b5 pixels (in1_565 and in2_565) to two a8r8g8b8 pixels 294 * returned in (out1_8888 and out2_8888) registers. Requires four scratch 295 * registers (scratch1 ... scratch4). It also requires maskG and maskB for 296 * color component extractions. These masks must have following values: 297 * li maskG, 0x07e007e0 298 * li maskB, 0x001F001F 299 */ 300.macro CONVERT_2x0565_TO_2x8888 in1_565, in2_565, \ 301 out1_8888, out2_8888, \ 302 maskG, maskB, \ 303 scratch1, scratch2, scratch3, scratch4 304 sll \scratch1, \in1_565, 16 305 or \scratch1, \scratch1, \in2_565 306 lui \out2_8888, 0xff00 307 ori \out2_8888, \out2_8888, 0xff00 308 shrl.ph \scratch2, \scratch1, 11 309 and \scratch3, \scratch1, \maskG 310 shra.ph \scratch4, \scratch2, 2 311 shll.ph \scratch2, \scratch2, 3 312 shll.ph \scratch3, \scratch3, 5 313 or \scratch2, \scratch2, \scratch4 314 shrl.qb \scratch4, \scratch3, 6 315 or \out2_8888, \out2_8888, \scratch2 316 or \scratch3, \scratch3, \scratch4 317 and \scratch1, \scratch1, \maskB 318 shll.ph \scratch2, \scratch1, 3 319 shra.ph \scratch4, \scratch1, 2 320 or \scratch2, \scratch2, \scratch4 321 or \scratch3, \scratch2, \scratch3 322 precrq.ph.w \out1_8888, \out2_8888, \scratch3 323 precr_sra.ph.w \out2_8888, \scratch3, 0 324.endm 325 326/* 327 * Conversion of single a8r8g8b8 pixel (in_8888) to single r5g6b5 pixel 328 * returned in (out_565) register. Requires two temporary registers 329 * (scratch1 and scratch2). 330 */ 331.macro CONVERT_1x8888_TO_1x0565 in_8888, \ 332 out_565, \ 333 scratch1, scratch2 334 ext \out_565, \in_8888, 0x3, 0x5 335 srl \scratch1, \in_8888, 0x5 336 andi \scratch1, \scratch1, 0x07e0 337 srl \scratch2, \in_8888, 0x8 338 andi \scratch2, \scratch2, 0xf800 339 or \out_565, \out_565, \scratch1 340 or \out_565, \out_565, \scratch2 341.endm 342 343/* 344 * Conversion of two a8r8g8b8 pixels (in1_8888 and in2_8888) to two r5g6b5 345 * pixels returned in (out1_565 and out2_565) registers. Requires two temporary 346 * registers (scratch1 and scratch2). It also requires maskR, maskG and maskB 347 * for color component extractions. These masks must have following values: 348 * li maskR, 0xf800f800 349 * li maskG, 0x07e007e0 350 * li maskB, 0x001F001F 351 * Value of input register in2_8888 is lost. 352 */ 353.macro CONVERT_2x8888_TO_2x0565 in1_8888, in2_8888, \ 354 out1_565, out2_565, \ 355 maskR, maskG, maskB, \ 356 scratch1, scratch2 357 precr.qb.ph \scratch1, \in2_8888, \in1_8888 358 precrq.qb.ph \in2_8888, \in2_8888, \in1_8888 359 and \out1_565, \scratch1, \maskR 360 shrl.ph \scratch1, \scratch1, 3 361 shll.ph \in2_8888, \in2_8888, 3 362 and \scratch1, \scratch1, \maskB 363 or \out1_565, \out1_565, \scratch1 364 and \in2_8888, \in2_8888, \maskG 365 or \out1_565, \out1_565, \in2_8888 366 srl \out2_565, \out1_565, 16 367.endm 368 369/* 370 * Multiply pixel (a8) with single pixel (a8r8g8b8). It requires maskLSR needed 371 * for rounding process. maskLSR must have following value: 372 * li maskLSR, 0x00ff00ff 373 */ 374.macro MIPS_UN8x4_MUL_UN8 s_8888, \ 375 m_8, \ 376 d_8888, \ 377 maskLSR, \ 378 scratch1, scratch2, scratch3 379 replv.ph \m_8, \m_8 /* 0 | M | 0 | M */ 380 muleu_s.ph.qbl \scratch1, \s_8888, \m_8 /* A*M | R*M */ 381 muleu_s.ph.qbr \scratch2, \s_8888, \m_8 /* G*M | B*M */ 382 shra_r.ph \scratch3, \scratch1, 8 383 shra_r.ph \d_8888, \scratch2, 8 384 and \scratch3, \scratch3, \maskLSR /* 0 |A*M| 0 |R*M */ 385 and \d_8888, \d_8888, \maskLSR /* 0 |G*M| 0 |B*M */ 386 addq.ph \scratch1, \scratch1, \scratch3 /* A*M+A*M | R*M+R*M */ 387 addq.ph \scratch2, \scratch2, \d_8888 /* G*M+G*M | B*M+B*M */ 388 shra_r.ph \scratch1, \scratch1, 8 389 shra_r.ph \scratch2, \scratch2, 8 390 precr.qb.ph \d_8888, \scratch1, \scratch2 391.endm 392 393/* 394 * Multiply two pixels (a8) with two pixels (a8r8g8b8). It requires maskLSR 395 * needed for rounding process. maskLSR must have following value: 396 * li maskLSR, 0x00ff00ff 397 */ 398.macro MIPS_2xUN8x4_MUL_2xUN8 s1_8888, \ 399 s2_8888, \ 400 m1_8, \ 401 m2_8, \ 402 d1_8888, \ 403 d2_8888, \ 404 maskLSR, \ 405 scratch1, scratch2, scratch3, \ 406 scratch4, scratch5, scratch6 407 replv.ph \m1_8, \m1_8 /* 0 | M1 | 0 | M1 */ 408 replv.ph \m2_8, \m2_8 /* 0 | M2 | 0 | M2 */ 409 muleu_s.ph.qbl \scratch1, \s1_8888, \m1_8 /* A1*M1 | R1*M1 */ 410 muleu_s.ph.qbr \scratch2, \s1_8888, \m1_8 /* G1*M1 | B1*M1 */ 411 muleu_s.ph.qbl \scratch3, \s2_8888, \m2_8 /* A2*M2 | R2*M2 */ 412 muleu_s.ph.qbr \scratch4, \s2_8888, \m2_8 /* G2*M2 | B2*M2 */ 413 shra_r.ph \scratch5, \scratch1, 8 414 shra_r.ph \d1_8888, \scratch2, 8 415 shra_r.ph \scratch6, \scratch3, 8 416 shra_r.ph \d2_8888, \scratch4, 8 417 and \scratch5, \scratch5, \maskLSR /* 0 |A1*M1| 0 |R1*M1 */ 418 and \d1_8888, \d1_8888, \maskLSR /* 0 |G1*M1| 0 |B1*M1 */ 419 and \scratch6, \scratch6, \maskLSR /* 0 |A2*M2| 0 |R2*M2 */ 420 and \d2_8888, \d2_8888, \maskLSR /* 0 |G2*M2| 0 |B2*M2 */ 421 addq.ph \scratch1, \scratch1, \scratch5 422 addq.ph \scratch2, \scratch2, \d1_8888 423 addq.ph \scratch3, \scratch3, \scratch6 424 addq.ph \scratch4, \scratch4, \d2_8888 425 shra_r.ph \scratch1, \scratch1, 8 426 shra_r.ph \scratch2, \scratch2, 8 427 shra_r.ph \scratch3, \scratch3, 8 428 shra_r.ph \scratch4, \scratch4, 8 429 precr.qb.ph \d1_8888, \scratch1, \scratch2 430 precr.qb.ph \d2_8888, \scratch3, \scratch4 431.endm 432 433/* 434 * Multiply pixel (a8r8g8b8) with single pixel (a8r8g8b8). It requires maskLSR 435 * needed for rounding process. maskLSR must have following value: 436 * li maskLSR, 0x00ff00ff 437 */ 438.macro MIPS_UN8x4_MUL_UN8x4 s_8888, \ 439 m_8888, \ 440 d_8888, \ 441 maskLSR, \ 442 scratch1, scratch2, scratch3, scratch4 443 preceu.ph.qbl \scratch1, \m_8888 /* 0 | A | 0 | R */ 444 preceu.ph.qbr \scratch2, \m_8888 /* 0 | G | 0 | B */ 445 muleu_s.ph.qbl \scratch3, \s_8888, \scratch1 /* A*A | R*R */ 446 muleu_s.ph.qbr \scratch4, \s_8888, \scratch2 /* G*G | B*B */ 447 shra_r.ph \scratch1, \scratch3, 8 448 shra_r.ph \scratch2, \scratch4, 8 449 and \scratch1, \scratch1, \maskLSR /* 0 |A*A| 0 |R*R */ 450 and \scratch2, \scratch2, \maskLSR /* 0 |G*G| 0 |B*B */ 451 addq.ph \scratch1, \scratch1, \scratch3 452 addq.ph \scratch2, \scratch2, \scratch4 453 shra_r.ph \scratch1, \scratch1, 8 454 shra_r.ph \scratch2, \scratch2, 8 455 precr.qb.ph \d_8888, \scratch1, \scratch2 456.endm 457 458/* 459 * Multiply two pixels (a8r8g8b8) with two pixels (a8r8g8b8). It requires 460 * maskLSR needed for rounding process. maskLSR must have following value: 461 * li maskLSR, 0x00ff00ff 462 */ 463 464.macro MIPS_2xUN8x4_MUL_2xUN8x4 s1_8888, \ 465 s2_8888, \ 466 m1_8888, \ 467 m2_8888, \ 468 d1_8888, \ 469 d2_8888, \ 470 maskLSR, \ 471 scratch1, scratch2, scratch3, \ 472 scratch4, scratch5, scratch6 473 preceu.ph.qbl \scratch1, \m1_8888 /* 0 | A | 0 | R */ 474 preceu.ph.qbr \scratch2, \m1_8888 /* 0 | G | 0 | B */ 475 preceu.ph.qbl \scratch3, \m2_8888 /* 0 | A | 0 | R */ 476 preceu.ph.qbr \scratch4, \m2_8888 /* 0 | G | 0 | B */ 477 muleu_s.ph.qbl \scratch5, \s1_8888, \scratch1 /* A*A | R*R */ 478 muleu_s.ph.qbr \scratch6, \s1_8888, \scratch2 /* G*G | B*B */ 479 muleu_s.ph.qbl \scratch1, \s2_8888, \scratch3 /* A*A | R*R */ 480 muleu_s.ph.qbr \scratch2, \s2_8888, \scratch4 /* G*G | B*B */ 481 shra_r.ph \scratch3, \scratch5, 8 482 shra_r.ph \scratch4, \scratch6, 8 483 shra_r.ph \d1_8888, \scratch1, 8 484 shra_r.ph \d2_8888, \scratch2, 8 485 and \scratch3, \scratch3, \maskLSR /* 0 |A*A| 0 |R*R */ 486 and \scratch4, \scratch4, \maskLSR /* 0 |G*G| 0 |B*B */ 487 and \d1_8888, \d1_8888, \maskLSR /* 0 |A*A| 0 |R*R */ 488 and \d2_8888, \d2_8888, \maskLSR /* 0 |G*G| 0 |B*B */ 489 addq.ph \scratch3, \scratch3, \scratch5 490 addq.ph \scratch4, \scratch4, \scratch6 491 addq.ph \d1_8888, \d1_8888, \scratch1 492 addq.ph \d2_8888, \d2_8888, \scratch2 493 shra_r.ph \scratch3, \scratch3, 8 494 shra_r.ph \scratch4, \scratch4, 8 495 shra_r.ph \scratch5, \d1_8888, 8 496 shra_r.ph \scratch6, \d2_8888, 8 497 precr.qb.ph \d1_8888, \scratch3, \scratch4 498 precr.qb.ph \d2_8888, \scratch5, \scratch6 499.endm 500 501/* 502 * OVER operation on single a8r8g8b8 source pixel (s_8888) and single a8r8g8b8 503 * destination pixel (d_8888) using a8 mask (m_8). It also requires maskLSR 504 * needed for rounding process. maskLSR must have following value: 505 * li maskLSR, 0x00ff00ff 506 */ 507.macro OVER_8888_8_8888 s_8888, \ 508 m_8, \ 509 d_8888, \ 510 out_8888, \ 511 maskLSR, \ 512 scratch1, scratch2, scratch3, scratch4 513 MIPS_UN8x4_MUL_UN8 \s_8888, \m_8, \ 514 \scratch1, \maskLSR, \ 515 \scratch2, \scratch3, \scratch4 516 517 not \scratch2, \scratch1 518 srl \scratch2, \scratch2, 24 519 520 MIPS_UN8x4_MUL_UN8 \d_8888, \scratch2, \ 521 \d_8888, \maskLSR, \ 522 \scratch3, \scratch4, \out_8888 523 524 addu_s.qb \out_8888, \d_8888, \scratch1 525.endm 526 527/* 528 * OVER operation on two a8r8g8b8 source pixels (s1_8888 and s2_8888) and two 529 * a8r8g8b8 destination pixels (d1_8888 and d2_8888) using a8 masks (m1_8 and 530 * m2_8). It also requires maskLSR needed for rounding process. maskLSR must 531 * have following value: 532 * li maskLSR, 0x00ff00ff 533 */ 534.macro OVER_2x8888_2x8_2x8888 s1_8888, \ 535 s2_8888, \ 536 m1_8, \ 537 m2_8, \ 538 d1_8888, \ 539 d2_8888, \ 540 out1_8888, \ 541 out2_8888, \ 542 maskLSR, \ 543 scratch1, scratch2, scratch3, \ 544 scratch4, scratch5, scratch6 545 MIPS_2xUN8x4_MUL_2xUN8 \s1_8888, \s2_8888, \ 546 \m1_8, \m2_8, \ 547 \scratch1, \scratch2, \ 548 \maskLSR, \ 549 \scratch3, \scratch4, \out1_8888, \ 550 \out2_8888, \scratch5, \scratch6 551 552 not \scratch3, \scratch1 553 srl \scratch3, \scratch3, 24 554 not \scratch4, \scratch2 555 srl \scratch4, \scratch4, 24 556 557 MIPS_2xUN8x4_MUL_2xUN8 \d1_8888, \d2_8888, \ 558 \scratch3, \scratch4, \ 559 \d1_8888, \d2_8888, \ 560 \maskLSR, \ 561 \scratch5, \scratch6, \out1_8888, \ 562 \out2_8888, \scratch3, \scratch4 563 564 addu_s.qb \out1_8888, \d1_8888, \scratch1 565 addu_s.qb \out2_8888, \d2_8888, \scratch2 566.endm 567 568/* 569 * OVER operation on single a8r8g8b8 source pixel (s_8888) and single a8r8g8b8 570 * destination pixel (d_8888). It also requires maskLSR needed for rounding 571 * process. maskLSR must have following value: 572 * li maskLSR, 0x00ff00ff 573 */ 574.macro OVER_8888_8888 s_8888, \ 575 d_8888, \ 576 out_8888, \ 577 maskLSR, \ 578 scratch1, scratch2, scratch3, scratch4 579 not \scratch1, \s_8888 580 srl \scratch1, \scratch1, 24 581 582 MIPS_UN8x4_MUL_UN8 \d_8888, \scratch1, \ 583 \out_8888, \maskLSR, \ 584 \scratch2, \scratch3, \scratch4 585 586 addu_s.qb \out_8888, \out_8888, \s_8888 587.endm 588 589/* 590 * OVER operation on two a8r8g8b8 source pixels (s1_8888 and s2_8888) and two 591 * a8r8g8b8 destination pixels (d1_8888 and d2_8888). It also requires maskLSR 592 * needed for rounding process. maskLSR must have following value: 593 * li maskLSR, 0x00ff00ff 594 */ 595.macro OVER_2x8888_2x8888 s1_8888, \ 596 s2_8888, \ 597 d1_8888, \ 598 d2_8888, \ 599 out1_8888, \ 600 out2_8888, \ 601 maskLSR, \ 602 scratch1, scratch2, scratch3, \ 603 scratch4, scratch5, scratch6 604 not \scratch1, \s1_8888 605 srl \scratch1, \scratch1, 24 606 not \scratch2, \s2_8888 607 srl \scratch2, \scratch2, 24 608 MIPS_2xUN8x4_MUL_2xUN8 \d1_8888, \d2_8888, \ 609 \scratch1, \scratch2, \ 610 \out1_8888, \out2_8888, \ 611 \maskLSR, \ 612 \scratch3, \scratch4, \scratch5, \ 613 \scratch6, \d1_8888, \d2_8888 614 615 addu_s.qb \out1_8888, \out1_8888, \s1_8888 616 addu_s.qb \out2_8888, \out2_8888, \s2_8888 617.endm 618 619.macro MIPS_UN8x4_MUL_UN8_ADD_UN8x4 s_8888, \ 620 m_8, \ 621 d_8888, \ 622 out_8888, \ 623 maskLSR, \ 624 scratch1, scratch2, scratch3 625 MIPS_UN8x4_MUL_UN8 \s_8888, \m_8, \ 626 \out_8888, \maskLSR, \ 627 \scratch1, \scratch2, \scratch3 628 629 addu_s.qb \out_8888, \out_8888, \d_8888 630.endm 631 632.macro MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 s1_8888, \ 633 s2_8888, \ 634 m1_8, \ 635 m2_8, \ 636 d1_8888, \ 637 d2_8888, \ 638 out1_8888, \ 639 out2_8888, \ 640 maskLSR, \ 641 scratch1, scratch2, scratch3, \ 642 scratch4, scratch5, scratch6 643 MIPS_2xUN8x4_MUL_2xUN8 \s1_8888, \s2_8888, \ 644 \m1_8, \m2_8, \ 645 \out1_8888, \out2_8888, \ 646 \maskLSR, \ 647 \scratch1, \scratch2, \scratch3, \ 648 \scratch4, \scratch5, \scratch6 649 650 addu_s.qb \out1_8888, \out1_8888, \d1_8888 651 addu_s.qb \out2_8888, \out2_8888, \d2_8888 652.endm 653 654.macro BILINEAR_INTERPOLATE_SINGLE_PIXEL tl, tr, bl, br, \ 655 scratch1, scratch2, \ 656 alpha, red, green, blue \ 657 wt1, wt2, wb1, wb2 658 andi \scratch1, \tl, 0xff 659 andi \scratch2, \tr, 0xff 660 andi \alpha, \bl, 0xff 661 andi \red, \br, 0xff 662 663 multu $ac0, \wt1, \scratch1 664 maddu $ac0, \wt2, \scratch2 665 maddu $ac0, \wb1, \alpha 666 maddu $ac0, \wb2, \red 667 668 ext \scratch1, \tl, 8, 8 669 ext \scratch2, \tr, 8, 8 670 ext \alpha, \bl, 8, 8 671 ext \red, \br, 8, 8 672 673 multu $ac1, \wt1, \scratch1 674 maddu $ac1, \wt2, \scratch2 675 maddu $ac1, \wb1, \alpha 676 maddu $ac1, \wb2, \red 677 678 ext \scratch1, \tl, 16, 8 679 ext \scratch2, \tr, 16, 8 680 ext \alpha, \bl, 16, 8 681 ext \red, \br, 16, 8 682 683 mflo \blue, $ac0 684 685 multu $ac2, \wt1, \scratch1 686 maddu $ac2, \wt2, \scratch2 687 maddu $ac2, \wb1, \alpha 688 maddu $ac2, \wb2, \red 689 690 ext \scratch1, \tl, 24, 8 691 ext \scratch2, \tr, 24, 8 692 ext \alpha, \bl, 24, 8 693 ext \red, \br, 24, 8 694 695 mflo \green, $ac1 696 697 multu $ac3, \wt1, \scratch1 698 maddu $ac3, \wt2, \scratch2 699 maddu $ac3, \wb1, \alpha 700 maddu $ac3, \wb2, \red 701 702 mflo \red, $ac2 703 mflo \alpha, $ac3 704 705 precr.qb.ph \alpha, \alpha, \red 706 precr.qb.ph \scratch1, \green, \blue 707 precrq.qb.ph \tl, \alpha, \scratch1 708.endm 709 710#endif //PIXMAN_MIPS_DSPR2_ASM_H 711