1/*
2 * Copyright (c) 2012
3 *      MIPS Technologies, Inc., California.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14 *    contributors may be used to endorse or promote products derived from
15 *    this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * Author:  Nemanja Lukic (nlukic@mips.com)
30 */
31
32#include "pixman-private.h"
33#include "pixman-mips-dspr2-asm.h"
34
35LEAF_MIPS_DSPR2(pixman_fill_buff16_mips)
36/*
37 * a0 - *dest
38 * a1 - count (bytes)
39 * a2 - value to fill buffer with
40 */
41
42    beqz     a1, 3f
43     andi    t1, a0, 0x0002
44    beqz     t1, 0f          /* check if address is 4-byte aligned */
45     nop
46    sh       a2, 0(a0)
47    addiu    a0, a0, 2
48    addiu    a1, a1, -2
490:
50    srl      t1, a1, 5       /* t1 how many multiples of 32 bytes */
51    replv.ph a2, a2          /* replicate fill value (16bit) in a2 */
52    beqz     t1, 2f
53     nop
541:
55    addiu    t1, t1, -1
56    beqz     t1, 11f
57     addiu   a1, a1, -32
58    pref     30, 32(a0)
59    sw       a2, 0(a0)
60    sw       a2, 4(a0)
61    sw       a2, 8(a0)
62    sw       a2, 12(a0)
63    sw       a2, 16(a0)
64    sw       a2, 20(a0)
65    sw       a2, 24(a0)
66    sw       a2, 28(a0)
67    b        1b
68     addiu   a0, a0, 32
6911:
70    sw       a2, 0(a0)
71    sw       a2, 4(a0)
72    sw       a2, 8(a0)
73    sw       a2, 12(a0)
74    sw       a2, 16(a0)
75    sw       a2, 20(a0)
76    sw       a2, 24(a0)
77    sw       a2, 28(a0)
78    addiu    a0, a0, 32
792:
80    blez     a1, 3f
81     addiu   a1, a1, -2
82    sh       a2, 0(a0)
83    b        2b
84     addiu   a0, a0, 2
853:
86    jr       ra
87     nop
88
89END(pixman_fill_buff16_mips)
90
91LEAF_MIPS32R2(pixman_fill_buff32_mips)
92/*
93 * a0 - *dest
94 * a1 - count (bytes)
95 * a2 - value to fill buffer with
96 */
97
98    beqz     a1, 3f
99     nop
100    srl      t1, a1, 5 /* t1 how many multiples of 32 bytes */
101    beqz     t1, 2f
102     nop
1031:
104    addiu    t1, t1, -1
105    beqz     t1, 11f
106     addiu   a1, a1, -32
107    pref     30, 32(a0)
108    sw       a2, 0(a0)
109    sw       a2, 4(a0)
110    sw       a2, 8(a0)
111    sw       a2, 12(a0)
112    sw       a2, 16(a0)
113    sw       a2, 20(a0)
114    sw       a2, 24(a0)
115    sw       a2, 28(a0)
116    b        1b
117     addiu   a0, a0, 32
11811:
119    sw       a2, 0(a0)
120    sw       a2, 4(a0)
121    sw       a2, 8(a0)
122    sw       a2, 12(a0)
123    sw       a2, 16(a0)
124    sw       a2, 20(a0)
125    sw       a2, 24(a0)
126    sw       a2, 28(a0)
127    addiu    a0, a0, 32
1282:
129    blez     a1, 3f
130     addiu   a1, a1, -4
131    sw       a2, 0(a0)
132    b        2b
133     addiu   a0, a0, 4
1343:
135    jr       ra
136     nop
137
138END(pixman_fill_buff32_mips)
139
140LEAF_MIPS_DSPR2(pixman_composite_src_8888_0565_asm_mips)
141/*
142 * a0 - dst (r5g6b5)
143 * a1 - src (a8r8g8b8)
144 * a2 - w
145 */
146
147    beqz     a2, 3f
148     nop
149    addiu    t1, a2, -1
150    beqz     t1, 2f
151     nop
152    li       t4, 0xf800f800
153    li       t5, 0x07e007e0
154    li       t6, 0x001f001f
1551:
156    lw       t0, 0(a1)
157    lw       t1, 4(a1)
158    addiu    a1, a1, 8
159    addiu    a2, a2, -2
160
161    CONVERT_2x8888_TO_2x0565 t0, t1, t2, t3, t4, t5, t6, t7, t8
162
163    sh       t2, 0(a0)
164    sh       t3, 2(a0)
165
166    addiu    t2, a2, -1
167    bgtz     t2, 1b
168     addiu   a0, a0, 4
1692:
170    beqz     a2, 3f
171     nop
172    lw       t0, 0(a1)
173
174    CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
175
176    sh       t1, 0(a0)
1773:
178    j        ra
179     nop
180
181END(pixman_composite_src_8888_0565_asm_mips)
182
183LEAF_MIPS_DSPR2(pixman_composite_src_0565_8888_asm_mips)
184/*
185 * a0 - dst (a8r8g8b8)
186 * a1 - src (r5g6b5)
187 * a2 - w
188 */
189
190    beqz     a2, 3f
191     nop
192    addiu    t1, a2, -1
193    beqz     t1, 2f
194     nop
195    li       t4, 0x07e007e0
196    li       t5, 0x001F001F
1971:
198    lhu      t0, 0(a1)
199    lhu      t1, 2(a1)
200    addiu    a1, a1, 4
201    addiu    a2, a2, -2
202
203    CONVERT_2x0565_TO_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9
204
205    sw       t2, 0(a0)
206    sw       t3, 4(a0)
207
208    addiu    t2, a2, -1
209    bgtz     t2, 1b
210     addiu   a0, a0, 8
2112:
212    beqz     a2, 3f
213     nop
214    lhu      t0, 0(a1)
215
216    CONVERT_1x0565_TO_1x8888 t0, t1, t2, t3
217
218    sw       t1, 0(a0)
2193:
220    j        ra
221     nop
222
223END(pixman_composite_src_0565_8888_asm_mips)
224
225LEAF_MIPS_DSPR2(pixman_composite_src_x888_8888_asm_mips)
226/*
227 * a0 - dst (a8r8g8b8)
228 * a1 - src (x8r8g8b8)
229 * a2 - w
230 */
231
232    beqz     a2, 4f
233     nop
234    li       t9, 0xff000000
235    srl      t8, a2, 3    /* t1 = how many multiples of 8 src pixels */
236    beqz     t8, 3f       /* branch if less than 8 src pixels */
237     nop
2381:
239    addiu    t8, t8, -1
240    beqz     t8, 2f
241     addiu   a2, a2, -8
242    pref     0, 32(a1)
243    lw       t0, 0(a1)
244    lw       t1, 4(a1)
245    lw       t2, 8(a1)
246    lw       t3, 12(a1)
247    lw       t4, 16(a1)
248    lw       t5, 20(a1)
249    lw       t6, 24(a1)
250    lw       t7, 28(a1)
251    addiu    a1, a1, 32
252    or       t0, t0, t9
253    or       t1, t1, t9
254    or       t2, t2, t9
255    or       t3, t3, t9
256    or       t4, t4, t9
257    or       t5, t5, t9
258    or       t6, t6, t9
259    or       t7, t7, t9
260    pref     30, 32(a0)
261    sw       t0, 0(a0)
262    sw       t1, 4(a0)
263    sw       t2, 8(a0)
264    sw       t3, 12(a0)
265    sw       t4, 16(a0)
266    sw       t5, 20(a0)
267    sw       t6, 24(a0)
268    sw       t7, 28(a0)
269    b        1b
270     addiu   a0, a0, 32
2712:
272    lw       t0, 0(a1)
273    lw       t1, 4(a1)
274    lw       t2, 8(a1)
275    lw       t3, 12(a1)
276    lw       t4, 16(a1)
277    lw       t5, 20(a1)
278    lw       t6, 24(a1)
279    lw       t7, 28(a1)
280    addiu    a1, a1, 32
281    or       t0, t0, t9
282    or       t1, t1, t9
283    or       t2, t2, t9
284    or       t3, t3, t9
285    or       t4, t4, t9
286    or       t5, t5, t9
287    or       t6, t6, t9
288    or       t7, t7, t9
289    sw       t0, 0(a0)
290    sw       t1, 4(a0)
291    sw       t2, 8(a0)
292    sw       t3, 12(a0)
293    sw       t4, 16(a0)
294    sw       t5, 20(a0)
295    sw       t6, 24(a0)
296    sw       t7, 28(a0)
297    beqz     a2, 4f
298     addiu   a0, a0, 32
2993:
300    lw       t0, 0(a1)
301    addiu    a1, a1, 4
302    addiu    a2, a2, -1
303    or       t1, t0, t9
304    sw       t1, 0(a0)
305    bnez     a2, 3b
306     addiu   a0, a0, 4
3074:
308    jr       ra
309     nop
310
311END(pixman_composite_src_x888_8888_asm_mips)
312
313#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
314LEAF_MIPS_DSPR2(pixman_composite_src_0888_8888_rev_asm_mips)
315/*
316 * a0 - dst (a8r8g8b8)
317 * a1 - src (b8g8r8)
318 * a2 - w
319 */
320
321    beqz              a2, 6f
322     nop
323
324    lui               t8, 0xff00;
325    srl               t9, a2, 2   /* t9 = how many multiples of 4 src pixels */
326    beqz              t9, 4f      /* branch if less than 4 src pixels */
327     nop
328
329    li                t0, 0x1
330    li                t1, 0x2
331    li                t2, 0x3
332    andi              t3, a1, 0x3
333    beq               t3, t0, 1f
334     nop
335    beq               t3, t1, 2f
336     nop
337    beq               t3, t2, 3f
338     nop
339
3400:
341    beqz              t9, 4f
342     addiu            t9, t9, -1
343    lw                t0, 0(a1)            /* t0 = R2 | B1 | G1 | R1 */
344    lw                t1, 4(a1)            /* t1 = G3 | R3 | B2 | G2 */
345    lw                t2, 8(a1)            /* t2 = B4 | G4 | R4 | B3 */
346
347    addiu             a1, a1, 12
348    addiu             a2, a2, -4
349
350    wsbh              t0, t0               /* t0 = B1 | R2 | R1 | G1 */
351    wsbh              t1, t1               /* t1 = R3 | G3 | G2 | B2 */
352    wsbh              t2, t2               /* t2 = G4 | B4 | B3 | R4 */
353
354    packrl.ph         t3, t1, t0           /* t3 = G2 | B2 | B1 | R2 */
355    packrl.ph         t4, t0, t0           /* t4 = R1 | G1 | B1 | R2 */
356    rotr              t3, t3, 16           /* t3 = B1 | R2 | G2 | B2 */
357    or                t3, t3, t8           /* t3 = FF | R2 | G2 | B2 */
358    srl               t4, t4, 8            /* t4 =  0 | R1 | G1 | B1 */
359    or                t4, t4, t8           /* t4 = FF | R1 | G1 | B1 */
360    packrl.ph         t5, t2, t1           /* t5 = B3 | R4 | R3 | G3 */
361    rotr              t5, t5, 24           /* t5 = R4 | R3 | G3 | B3 */
362    or                t5, t5, t8           /* t5 = FF | R3 | G3 | B3 */
363    rotr              t2, t2, 16           /* t2 = B3 | R4 | G4 | B4 */
364    or                t2, t2, t8           /* t5 = FF | R3 | G3 | B3 */
365
366    sw                t4, 0(a0)
367    sw                t3, 4(a0)
368    sw                t5, 8(a0)
369    sw                t2, 12(a0)
370    b                 0b
371     addiu            a0, a0, 16
372
3731:
374    lbu               t6, 0(a1)            /* t6 =  0 |  0 |  0 | R1 */
375    lhu               t7, 1(a1)            /* t7 =  0 |  0 | B1 | G1 */
376    sll               t6, t6, 16           /* t6 =  0 | R1 |  0 | 0  */
377    wsbh              t7, t7               /* t7 =  0 |  0 | G1 | B1 */
378    or                t7, t6, t7           /* t7 =  0 | R1 | G1 | B1 */
37911:
380    beqz              t9, 4f
381     addiu            t9, t9, -1
382    lw                t0, 3(a1)            /* t0 = R3 | B2 | G2 | R2 */
383    lw                t1, 7(a1)            /* t1 = G4 | R4 | B3 | G3 */
384    lw                t2, 11(a1)           /* t2 = B5 | G5 | R5 | B4 */
385
386    addiu             a1, a1, 12
387    addiu             a2, a2, -4
388
389    wsbh              t0, t0               /* t0 = B2 | R3 | R2 | G2 */
390    wsbh              t1, t1               /* t1 = R4 | G4 | G3 | B3 */
391    wsbh              t2, t2               /* t2 = G5 | B5 | B4 | R5 */
392
393    packrl.ph         t3, t1, t0           /* t3 = G3 | B3 | B2 | R3 */
394    packrl.ph         t4, t2, t1           /* t4 = B4 | R5 | R4 | G4 */
395    rotr              t0, t0, 24           /* t0 = R3 | R2 | G2 | B2 */
396    rotr              t3, t3, 16           /* t3 = B2 | R3 | G3 | B3 */
397    rotr              t4, t4, 24           /* t4 = R5 | R4 | G4 | B4 */
398    or                t7, t7, t8           /* t7 = FF | R1 | G1 | B1 */
399    or                t0, t0, t8           /* t0 = FF | R2 | G2 | B2 */
400    or                t3, t3, t8           /* t1 = FF | R3 | G3 | B3 */
401    or                t4, t4, t8           /* t3 = FF | R4 | G4 | B4 */
402
403    sw                t7, 0(a0)
404    sw                t0, 4(a0)
405    sw                t3, 8(a0)
406    sw                t4, 12(a0)
407    rotr              t7, t2, 16           /* t7 = xx | R5 | G5 | B5 */
408    b                 11b
409     addiu            a0, a0, 16
410
4112:
412    lhu               t7, 0(a1)            /* t7 =  0 |  0 | G1 | R1 */
413    wsbh              t7, t7               /* t7 =  0 |  0 | R1 | G1 */
41421:
415    beqz              t9, 4f
416     addiu            t9, t9, -1
417    lw                t0, 2(a1)            /* t0 = B2 | G2 | R2 | B1 */
418    lw                t1, 6(a1)            /* t1 = R4 | B3 | G3 | R3 */
419    lw                t2, 10(a1)           /* t2 = G5 | R5 | B4 | G4 */
420
421    addiu             a1, a1, 12
422    addiu             a2, a2, -4
423
424    wsbh              t0, t0               /* t0 = G2 | B2 | B1 | R2 */
425    wsbh              t1, t1               /* t1 = B3 | R4 | R3 | G3 */
426    wsbh              t2, t2               /* t2 = R5 | G5 | G4 | B4 */
427
428    precr_sra.ph.w    t7, t0, 0            /* t7 = R1 | G1 | B1 | R2 */
429    rotr              t0, t0, 16           /* t0 = B1 | R2 | G2 | B2 */
430    packrl.ph         t3, t2, t1           /* t3 = G4 | B4 | B3 | R4 */
431    rotr              t1, t1, 24           /* t1 = R4 | R3 | G3 | B3 */
432    srl               t7, t7, 8            /* t7 =  0 | R1 | G1 | B1 */
433    rotr              t3, t3, 16           /* t3 = B3 | R4 | G4 | B4 */
434    or                t7, t7, t8           /* t7 = FF | R1 | G1 | B1 */
435    or                t0, t0, t8           /* t0 = FF | R2 | G2 | B2 */
436    or                t1, t1, t8           /* t1 = FF | R3 | G3 | B3 */
437    or                t3, t3, t8           /* t3 = FF | R4 | G4 | B4 */
438
439    sw                t7, 0(a0)
440    sw                t0, 4(a0)
441    sw                t1, 8(a0)
442    sw                t3, 12(a0)
443    srl               t7, t2, 16           /* t7 =  0 |  0 | R5 | G5 */
444    b                 21b
445     addiu            a0, a0, 16
446
4473:
448    lbu               t7, 0(a1)            /* t7 =  0 |  0 |  0 | R1 */
44931:
450    beqz              t9, 4f
451     addiu            t9, t9, -1
452    lw                t0, 1(a1)            /* t0 = G2 | R2 | B1 | G1 */
453    lw                t1, 5(a1)            /* t1 = B3 | G3 | R3 | B2 */
454    lw                t2, 9(a1)            /* t2 = R5 | B4 | G4 | R4 */
455
456    addiu             a1, a1, 12
457    addiu             a2, a2, -4
458
459    wsbh              t0, t0               /* t0 = R2 | G2 | G1 | B1 */
460    wsbh              t1, t1               /* t1 = G3 | B3 | B2 | R3 */
461    wsbh              t2, t2               /* t2 = B4 | R5 | R4 | G4 */
462
463    precr_sra.ph.w    t7, t0, 0            /* t7 = xx | R1 | G1 | B1 */
464    packrl.ph         t3, t1, t0           /* t3 = B2 | R3 | R2 | G2 */
465    rotr              t1, t1, 16           /* t1 = B2 | R3 | G3 | B3 */
466    rotr              t4, t2, 24           /* t4 = R5 | R4 | G4 | B4 */
467    rotr              t3, t3, 24           /* t3 = R3 | R2 | G2 | B2 */
468    or                t7, t7, t8           /* t7 = FF | R1 | G1 | B1 */
469    or                t3, t3, t8           /* t3 = FF | R2 | G2 | B2 */
470    or                t1, t1, t8           /* t1 = FF | R3 | G3 | B3 */
471    or                t4, t4, t8           /* t4 = FF | R4 | G4 | B4 */
472
473    sw                t7, 0(a0)
474    sw                t3, 4(a0)
475    sw                t1, 8(a0)
476    sw                t4, 12(a0)
477    srl               t7, t2, 16           /* t7 =  0 |  0 | xx | R5 */
478    b                 31b
479     addiu            a0, a0, 16
480
4814:
482    beqz              a2, 6f
483     nop
4845:
485    lbu               t0, 0(a1)            /* t0 =  0 | 0 | 0 | R */
486    lbu               t1, 1(a1)            /* t1 =  0 | 0 | 0 | G */
487    lbu               t2, 2(a1)            /* t2 =  0 | 0 | 0 | B */
488    addiu             a1, a1, 3
489
490    sll               t0, t0, 16           /* t2 =  0 | R | 0 | 0 */
491    sll               t1, t1, 8            /* t1 =  0 | 0 | G | 0 */
492
493    or                t2, t2, t1           /* t2 =  0 | 0 | G | B */
494    or                t2, t2, t0           /* t2 =  0 | R | G | B */
495    or                t2, t2, t8           /* t2 = FF | R | G | B */
496
497    sw                t2, 0(a0)
498    addiu             a2, a2, -1
499    bnez              a2, 5b
500     addiu            a0, a0, 4
5016:
502    j                 ra
503     nop
504
505END(pixman_composite_src_0888_8888_rev_asm_mips)
506
507LEAF_MIPS_DSPR2(pixman_composite_src_0888_0565_rev_asm_mips)
508/*
509 * a0 - dst (r5g6b5)
510 * a1 - src (b8g8r8)
511 * a2 - w
512 */
513
514    SAVE_REGS_ON_STACK 0, v0, v1
515    beqz              a2, 6f
516     nop
517
518    li                t6, 0xf800f800
519    li                t7, 0x07e007e0
520    li                t8, 0x001F001F
521    srl               t9, a2, 2   /* t9 = how many multiples of 4 src pixels */
522    beqz              t9, 4f      /* branch if less than 4 src pixels */
523     nop
524
525    li                t0, 0x1
526    li                t1, 0x2
527    li                t2, 0x3
528    andi              t3, a1, 0x3
529    beq               t3, t0, 1f
530     nop
531    beq               t3, t1, 2f
532     nop
533    beq               t3, t2, 3f
534     nop
535
5360:
537    beqz              t9, 4f
538     addiu            t9, t9, -1
539    lw                t0, 0(a1)            /* t0 = R2 | B1 | G1 | R1 */
540    lw                t1, 4(a1)            /* t1 = G3 | R3 | B2 | G2 */
541    lw                t2, 8(a1)            /* t2 = B4 | G4 | R4 | B3 */
542
543    addiu             a1, a1, 12
544    addiu             a2, a2, -4
545
546    wsbh              t0, t0               /* t0 = B1 | R2 | R1 | G1 */
547    wsbh              t1, t1               /* t1 = R3 | G3 | G2 | B2 */
548    wsbh              t2, t2               /* t2 = G4 | B4 | B3 | R4 */
549
550    packrl.ph         t3, t1, t0           /* t3 = G2 | B2 | B1 | R2 */
551    packrl.ph         t4, t0, t0           /* t4 = R1 | G1 | B1 | R2 */
552    rotr              t3, t3, 16           /* t3 = B1 | R2 | G2 | B2 */
553    srl               t4, t4, 8            /* t4 =  0 | R1 | G1 | B1 */
554    packrl.ph         t5, t2, t1           /* t5 = B3 | R4 | R3 | G3 */
555    rotr              t5, t5, 24           /* t5 = R4 | R3 | G3 | B3 */
556    rotr              t2, t2, 16           /* t2 = B3 | R4 | G4 | B4 */
557
558    CONVERT_2x8888_TO_2x0565 t4, t3, t4, t3, t6, t7, t8, v0, v1
559    CONVERT_2x8888_TO_2x0565 t5, t2, t5, t2, t6, t7, t8, v0, v1
560
561    sh                t4, 0(a0)
562    sh                t3, 2(a0)
563    sh                t5, 4(a0)
564    sh                t2, 6(a0)
565    b                 0b
566     addiu            a0, a0, 8
567
5681:
569    lbu               t4, 0(a1)            /* t4 =  0 |  0 |  0 | R1 */
570    lhu               t5, 1(a1)            /* t5 =  0 |  0 | B1 | G1 */
571    sll               t4, t4, 16           /* t4 =  0 | R1 |  0 | 0  */
572    wsbh              t5, t5               /* t5 =  0 |  0 | G1 | B1 */
573    or                t5, t4, t5           /* t5 =  0 | R1 | G1 | B1 */
57411:
575    beqz              t9, 4f
576     addiu            t9, t9, -1
577    lw                t0, 3(a1)            /* t0 = R3 | B2 | G2 | R2 */
578    lw                t1, 7(a1)            /* t1 = G4 | R4 | B3 | G3 */
579    lw                t2, 11(a1)           /* t2 = B5 | G5 | R5 | B4 */
580
581    addiu             a1, a1, 12
582    addiu             a2, a2, -4
583
584    wsbh              t0, t0               /* t0 = B2 | R3 | R2 | G2 */
585    wsbh              t1, t1               /* t1 = R4 | G4 | G3 | B3 */
586    wsbh              t2, t2               /* t2 = G5 | B5 | B4 | R5 */
587
588    packrl.ph         t3, t1, t0           /* t3 = G3 | B3 | B2 | R3 */
589    packrl.ph         t4, t2, t1           /* t4 = B4 | R5 | R4 | G4 */
590    rotr              t0, t0, 24           /* t0 = R3 | R2 | G2 | B2 */
591    rotr              t3, t3, 16           /* t3 = B2 | R3 | G3 | B3 */
592    rotr              t4, t4, 24           /* t4 = R5 | R4 | G4 | B4 */
593
594    CONVERT_2x8888_TO_2x0565 t5, t0, t5, t0, t6, t7, t8, v0, v1
595    CONVERT_2x8888_TO_2x0565 t3, t4, t3, t4, t6, t7, t8, v0, v1
596
597    sh                t5, 0(a0)
598    sh                t0, 2(a0)
599    sh                t3, 4(a0)
600    sh                t4, 6(a0)
601    rotr              t5, t2, 16           /* t5 = xx | R5 | G5 | B5 */
602    b                 11b
603     addiu            a0, a0, 8
604
6052:
606    lhu               t5, 0(a1)            /* t5 =  0 |  0 | G1 | R1 */
607    wsbh              t5, t5               /* t5 =  0 |  0 | R1 | G1 */
60821:
609    beqz              t9, 4f
610     addiu            t9, t9, -1
611    lw                t0, 2(a1)            /* t0 = B2 | G2 | R2 | B1 */
612    lw                t1, 6(a1)            /* t1 = R4 | B3 | G3 | R3 */
613    lw                t2, 10(a1)           /* t2 = G5 | R5 | B4 | G4 */
614
615    addiu             a1, a1, 12
616    addiu             a2, a2, -4
617
618    wsbh              t0, t0               /* t0 = G2 | B2 | B1 | R2 */
619    wsbh              t1, t1               /* t1 = B3 | R4 | R3 | G3 */
620    wsbh              t2, t2               /* t2 = R5 | G5 | G4 | B4 */
621
622    precr_sra.ph.w    t5, t0, 0            /* t5 = R1 | G1 | B1 | R2 */
623    rotr              t0, t0, 16           /* t0 = B1 | R2 | G2 | B2 */
624    packrl.ph         t3, t2, t1           /* t3 = G4 | B4 | B3 | R4 */
625    rotr              t1, t1, 24           /* t1 = R4 | R3 | G3 | B3 */
626    srl               t5, t5, 8            /* t5 =  0 | R1 | G1 | B1 */
627    rotr              t3, t3, 16           /* t3 = B3 | R4 | G4 | B4 */
628
629    CONVERT_2x8888_TO_2x0565 t5, t0, t5, t0, t6, t7, t8, v0, v1
630    CONVERT_2x8888_TO_2x0565 t1, t3, t1, t3, t6, t7, t8, v0, v1
631
632    sh                t5, 0(a0)
633    sh                t0, 2(a0)
634    sh                t1, 4(a0)
635    sh                t3, 6(a0)
636    srl               t5, t2, 16           /* t5 =  0 |  0 | R5 | G5 */
637    b                 21b
638     addiu            a0, a0, 8
639
6403:
641    lbu               t5, 0(a1)            /* t5 =  0 |  0 |  0 | R1 */
64231:
643    beqz              t9, 4f
644     addiu            t9, t9, -1
645    lw                t0, 1(a1)            /* t0 = G2 | R2 | B1 | G1 */
646    lw                t1, 5(a1)            /* t1 = B3 | G3 | R3 | B2 */
647    lw                t2, 9(a1)            /* t2 = R5 | B4 | G4 | R4 */
648
649    addiu             a1, a1, 12
650    addiu             a2, a2, -4
651
652    wsbh              t0, t0               /* t0 = R2 | G2 | G1 | B1 */
653    wsbh              t1, t1               /* t1 = G3 | B3 | B2 | R3 */
654    wsbh              t2, t2               /* t2 = B4 | R5 | R4 | G4 */
655
656    precr_sra.ph.w    t5, t0, 0            /* t5 = xx | R1 | G1 | B1 */
657    packrl.ph         t3, t1, t0           /* t3 = B2 | R3 | R2 | G2 */
658    rotr              t1, t1, 16           /* t1 = B2 | R3 | G3 | B3 */
659    rotr              t4, t2, 24           /* t4 = R5 | R4 | G4 | B4 */
660    rotr              t3, t3, 24           /* t3 = R3 | R2 | G2 | B2 */
661
662    CONVERT_2x8888_TO_2x0565 t5, t3, t5, t3, t6, t7, t8, v0, v1
663    CONVERT_2x8888_TO_2x0565 t1, t4, t1, t4, t6, t7, t8, v0, v1
664
665    sh                t5, 0(a0)
666    sh                t3, 2(a0)
667    sh                t1, 4(a0)
668    sh                t4, 6(a0)
669    srl               t5, t2, 16           /* t5 =  0 |  0 | xx | R5 */
670    b                 31b
671     addiu            a0, a0, 8
672
6734:
674    beqz              a2, 6f
675     nop
6765:
677    lbu               t0, 0(a1)            /* t0 =  0 | 0 | 0 | R */
678    lbu               t1, 1(a1)            /* t1 =  0 | 0 | 0 | G */
679    lbu               t2, 2(a1)            /* t2 =  0 | 0 | 0 | B */
680    addiu             a1, a1, 3
681
682    sll               t0, t0, 16           /* t2 =  0 | R | 0 | 0 */
683    sll               t1, t1, 8            /* t1 =  0 | 0 | G | 0 */
684
685    or                t2, t2, t1           /* t2 =  0 | 0 | G | B */
686    or                t2, t2, t0           /* t2 =  0 | R | G | B */
687
688    CONVERT_1x8888_TO_1x0565 t2, t3, t4, t5
689
690    sh                t3, 0(a0)
691    addiu             a2, a2, -1
692    bnez              a2, 5b
693     addiu            a0, a0, 2
6946:
695    RESTORE_REGS_FROM_STACK 0, v0, v1
696    j                 ra
697     nop
698
699END(pixman_composite_src_0888_0565_rev_asm_mips)
700#endif
701
702LEAF_MIPS_DSPR2(pixman_composite_src_pixbuf_8888_asm_mips)
703/*
704 * a0 - dst  (a8b8g8r8)
705 * a1 - src  (a8r8g8b8)
706 * a2 - w
707 */
708
709    SAVE_REGS_ON_STACK 0, v0
710    li       v0, 0x00ff00ff
711
712    beqz     a2, 3f
713     nop
714    addiu    t1, a2, -1
715    beqz     t1, 2f
716     nop
7171:
718    lw       t0, 0(a1)
719    lw       t1, 4(a1)
720    addiu    a1, a1, 8
721    addiu    a2, a2, -2
722    srl      t2, t0, 24
723    srl      t3, t1, 24
724
725    MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t2, t3, t0, t1, v0, t4, t5, t6, t7, t8, t9
726
727    sll      t0, t0, 8
728    sll      t1, t1, 8
729    andi     t2, t2, 0xff
730    andi     t3, t3, 0xff
731    or       t0, t0, t2
732    or       t1, t1, t3
733    wsbh     t0, t0
734    wsbh     t1, t1
735    rotr     t0, t0, 16
736    rotr     t1, t1, 16
737    sw       t0, 0(a0)
738    sw       t1, 4(a0)
739
740    addiu    t2, a2, -1
741    bgtz     t2, 1b
742     addiu   a0, a0, 8
7432:
744    beqz     a2, 3f
745     nop
746    lw       t0, 0(a1)
747    srl      t1, t0, 24
748
749    MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t3, t4, t5
750
751    sll      t0, t0, 8
752    andi     t1, t1, 0xff
753    or       t0, t0, t1
754    wsbh     t0, t0
755    rotr     t0, t0, 16
756    sw       t0, 0(a0)
7573:
758    RESTORE_REGS_FROM_STACK 0, v0
759    j        ra
760     nop
761
762END(pixman_composite_src_pixbuf_8888_asm_mips)
763
764LEAF_MIPS_DSPR2(pixman_composite_src_rpixbuf_8888_asm_mips)
765/*
766 * a0 - dst  (a8r8g8b8)
767 * a1 - src  (a8r8g8b8)
768 * a2 - w
769 */
770
771    SAVE_REGS_ON_STACK 0, v0
772    li       v0, 0x00ff00ff
773
774    beqz     a2, 3f
775     nop
776    addiu    t1, a2, -1
777    beqz     t1, 2f
778     nop
7791:
780    lw       t0, 0(a1)
781    lw       t1, 4(a1)
782    addiu    a1, a1, 8
783    addiu    a2, a2, -2
784    srl      t2, t0, 24
785    srl      t3, t1, 24
786
787    MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t2, t3, t0, t1, v0, t4, t5, t6, t7, t8, t9
788
789    sll      t0, t0, 8
790    sll      t1, t1, 8
791    andi     t2, t2, 0xff
792    andi     t3, t3, 0xff
793    or       t0, t0, t2
794    or       t1, t1, t3
795    rotr     t0, t0, 8
796    rotr     t1, t1, 8
797    sw       t0, 0(a0)
798    sw       t1, 4(a0)
799
800    addiu    t2, a2, -1
801    bgtz     t2, 1b
802     addiu   a0, a0, 8
8032:
804    beqz     a2, 3f
805     nop
806    lw       t0, 0(a1)
807    srl      t1, t0, 24
808
809    MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t3, t4, t5
810
811    sll      t0, t0, 8
812    andi     t1, t1, 0xff
813    or       t0, t0, t1
814    rotr     t0, t0, 8
815    sw       t0, 0(a0)
8163:
817    RESTORE_REGS_FROM_STACK 0, v0
818    j        ra
819     nop
820
821END(pixman_composite_src_rpixbuf_8888_asm_mips)
822
823LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8888_asm_mips)
824/*
825 * a0 - dst  (a8r8g8b8)
826 * a1 - src  (32bit constant)
827 * a2 - mask (a8)
828 * a3 - w
829 */
830
831
832    SAVE_REGS_ON_STACK 0, v0
833    li       v0, 0x00ff00ff
834
835    beqz     a3, 3f
836     nop
837    addiu    t1, a3, -1
838    beqz     t1, 2f
839     nop
840
8411:
842                       /* a1 = source      (32bit constant) */
843    lbu      t0, 0(a2) /* t2 = mask        (a8) */
844    lbu      t1, 1(a2) /* t3 = mask        (a8) */
845    addiu    a2, a2, 2
846
847    MIPS_2xUN8x4_MUL_2xUN8 a1, a1, t0, t1, t2, t3, v0, t4, t5, t6, t7, t8, t9
848
849    sw       t2, 0(a0)
850    sw       t3, 4(a0)
851    addiu    a3, a3, -2
852    addiu    t2, a3, -1
853    bgtz     t2, 1b
854     addiu   a0, a0, 8
855
856    beqz     a3, 3f
857     nop
858
8592:
860    lbu      t0, 0(a2)
861    addiu    a2, a2, 1
862
863    MIPS_UN8x4_MUL_UN8 a1, t0, t1, v0, t3, t4, t5
864
865    sw       t1, 0(a0)
866    addiu    a3, a3, -1
867    addiu    a0, a0, 4
868
8693:
870    RESTORE_REGS_FROM_STACK 0, v0
871    j        ra
872     nop
873
874END(pixman_composite_src_n_8_8888_asm_mips)
875
876LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8_asm_mips)
877/*
878 * a0 - dst  (a8)
879 * a1 - src  (32bit constant)
880 * a2 - mask (a8)
881 * a3 - w
882 */
883
884    li                t9, 0x00ff00ff
885    beqz              a3, 3f
886     nop
887    srl               t7, a3, 2   /* t7 = how many multiples of 4 dst pixels */
888    beqz              t7, 1f      /* branch if less than 4 src pixels */
889     nop
890
891    srl               t8, a1, 24
892    replv.ph          t8, t8
893
8940:
895    beqz              t7, 1f
896     addiu            t7, t7, -1
897    lbu               t0, 0(a2)
898    lbu               t1, 1(a2)
899    lbu               t2, 2(a2)
900    lbu               t3, 3(a2)
901
902    addiu             a2, a2, 4
903
904    precr_sra.ph.w    t1, t0, 0
905    precr_sra.ph.w    t3, t2, 0
906    precr.qb.ph       t0, t3, t1
907
908    muleu_s.ph.qbl    t2, t0, t8
909    muleu_s.ph.qbr    t3, t0, t8
910    shra_r.ph         t4, t2, 8
911    shra_r.ph         t5, t3, 8
912    and               t4, t4, t9
913    and               t5, t5, t9
914    addq.ph           t2, t2, t4
915    addq.ph           t3, t3, t5
916    shra_r.ph         t2, t2, 8
917    shra_r.ph         t3, t3, 8
918    precr.qb.ph       t2, t2, t3
919
920    sb                t2, 0(a0)
921    srl               t2, t2, 8
922    sb                t2, 1(a0)
923    srl               t2, t2, 8
924    sb                t2, 2(a0)
925    srl               t2, t2, 8
926    sb                t2, 3(a0)
927    addiu             a3, a3, -4
928    b                 0b
929     addiu            a0, a0, 4
930
9311:
932    beqz              a3, 3f
933     nop
934    srl               t8, a1, 24
9352:
936    lbu               t0, 0(a2)
937    addiu             a2, a2, 1
938
939    mul               t2, t0, t8
940    shra_r.ph         t3, t2, 8
941    andi              t3, t3, 0x00ff
942    addq.ph           t2, t2, t3
943    shra_r.ph         t2, t2, 8
944
945    sb                t2, 0(a0)
946    addiu             a3, a3, -1
947    bnez              a3, 2b
948     addiu            a0, a0, 1
949
9503:
951    j                 ra
952     nop
953
954END(pixman_composite_src_n_8_8_asm_mips)
955
956LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips)
957/*
958 * a0 - dst  (a8r8g8b8)
959 * a1 - src  (32bit constant)
960 * a2 - mask (a8r8g8b8)
961 * a3 - w
962 */
963
964    beqz         a3, 8f
965     nop
966    SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5
967
968    li           t6, 0xff
969    addiu        t7, zero, -1 /* t7 = 0xffffffff */
970    srl          t8, a1, 24   /* t8 = srca */
971    li           t9, 0x00ff00ff
972
973    addiu        t1, a3, -1
974    beqz         t1, 4f       /* last pixel */
975     nop
976
9770:
978    lw           t0, 0(a2)    /* t0 = mask */
979    lw           t1, 4(a2)    /* t1 = mask */
980    addiu        a3, a3, -2   /* w = w - 2 */
981    or           t2, t0, t1
982    beqz         t2, 3f      /* if (t0 == 0) && (t1 == 0) */
983     addiu       a2, a2, 8
984    and          t2, t0, t1
985    beq          t2, t7, 1f  /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
986     nop
987
988//if(ma)
989    lw           t2, 0(a0)    /* t2 = dst */
990    lw           t3, 4(a0)    /* t3 = dst */
991    MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5
992    MIPS_2xUN8x4_MUL_2xUN8   t0, t1, t8, t8, t0, t1, t9, s0, s1, s2, s3, s4, s5
993    not          t0, t0
994    not          t1, t1
995    MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
996    addu_s.qb    t2, t4, t2
997    addu_s.qb    t3, t5, t3
998    sw           t2, 0(a0)
999    sw           t3, 4(a0)
1000    addiu        t1, a3, -1
1001    bgtz         t1, 0b
1002     addiu       a0, a0, 8
1003    b            4f
1004     nop
10051:
1006//if (t0 == 0xffffffff) && (t1 == 0xffffffff):
1007    beq          t8, t6, 2f   /* if (srca == 0xff) */
1008     nop
1009    lw           t2, 0(a0)    /* t2 = dst */
1010    lw           t3, 4(a0)    /* t3 = dst */
1011    not          t0, a1
1012    not          t1, a1
1013    srl          t0, t0, 24
1014    srl          t1, t1, 24
1015    MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
1016    addu_s.qb    t2, a1, t2
1017    addu_s.qb    t3, a1, t3
1018    sw           t2, 0(a0)
1019    sw           t3, 4(a0)
1020    addiu        t1, a3, -1
1021    bgtz         t1, 0b
1022     addiu       a0, a0, 8
1023    b            4f
1024     nop
10252:
1026    sw           a1, 0(a0)
1027    sw           a1, 4(a0)
10283:
1029    addiu        t1, a3, -1
1030    bgtz         t1, 0b
1031     addiu       a0, a0, 8
1032
10334:
1034    beqz         a3, 7f
1035     nop
1036                              /* a1 = src */
1037    lw           t0, 0(a2)    /* t0 = mask */
1038    beqz         t0, 7f       /* if (t0 == 0) */
1039     nop
1040    beq          t0, t7, 5f  /* if (t0 == 0xffffffff) */
1041     nop
1042//if(ma)
1043    lw           t1, 0(a0)    /* t1 = dst */
1044    MIPS_UN8x4_MUL_UN8x4  a1, t0, t2, t9, t3, t4, t5, s0
1045    MIPS_UN8x4_MUL_UN8    t0, t8, t0, t9, t3, t4, t5
1046    not          t0, t0
1047    MIPS_UN8x4_MUL_UN8x4  t1, t0, t1, t9, t3, t4, t5, s0
1048    addu_s.qb    t1, t2, t1
1049    sw           t1, 0(a0)
1050    RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5
1051    j            ra
1052     nop
10535:
1054//if (t0 == 0xffffffff)
1055    beq          t8, t6, 6f   /* if (srca == 0xff) */
1056     nop
1057    lw           t1, 0(a0)    /* t1 = dst */
1058    not          t0, a1
1059    srl          t0, t0, 24
1060    MIPS_UN8x4_MUL_UN8 t1, t0, t1, t9, t2, t3, t4
1061    addu_s.qb    t1, a1, t1
1062    sw           t1, 0(a0)
1063    RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5
1064    j            ra
1065     nop
10666:
1067    sw           a1, 0(a0)
10687:
1069    RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5
10708:
1071    j            ra
1072     nop
1073
1074END(pixman_composite_over_n_8888_8888_ca_asm_mips)
1075
1076LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm_mips)
1077/*
1078 * a0 - dst  (r5g6b5)
1079 * a1 - src  (32bit constant)
1080 * a2 - mask (a8r8g8b8)
1081 * a3 - w
1082 */
1083
1084    beqz         a3, 8f
1085     nop
1086    SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
1087
1088    li           t6, 0xff
1089    addiu        t7, zero, -1 /* t7 = 0xffffffff */
1090    srl          t8, a1, 24   /* t8 = srca */
1091    li           t9, 0x00ff00ff
1092    li           s6, 0xf800f800
1093    li           s7, 0x07e007e0
1094    li           s8, 0x001F001F
1095
1096    addiu        t1, a3, -1
1097    beqz         t1, 4f       /* last pixel */
1098     nop
1099
11000:
1101    lw           t0, 0(a2)    /* t0 = mask */
1102    lw           t1, 4(a2)    /* t1 = mask */
1103    addiu        a3, a3, -2   /* w = w - 2 */
1104    or           t2, t0, t1
1105    beqz         t2, 3f      /* if (t0 == 0) && (t1 == 0) */
1106     addiu       a2, a2, 8
1107    and          t2, t0, t1
1108    beq          t2, t7, 1f  /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
1109     nop
1110
1111//if(ma)
1112    lhu          t2, 0(a0)    /* t2 = dst */
1113    lhu          t3, 2(a0)    /* t3 = dst */
1114    MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5
1115    MIPS_2xUN8x4_MUL_2xUN8   t0, t1, t8, t8, t0, t1, t9, s0, s1, s2, s3, s4, s5
1116    not          t0, t0
1117    not          t1, t1
1118    CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, s7, s8, s0, s1, s2, s3
1119    MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
1120    addu_s.qb    t2, t4, t2
1121    addu_s.qb    t3, t5, t3
1122    CONVERT_2x8888_TO_2x0565 t2, t3, t2, t3, s6, s7, s8, s0, s1
1123    sh           t2, 0(a0)
1124    sh           t3, 2(a0)
1125    addiu        t1, a3, -1
1126    bgtz         t1, 0b
1127     addiu       a0, a0, 4
1128    b            4f
1129     nop
11301:
1131//if (t0 == 0xffffffff) && (t1 == 0xffffffff):
1132    beq          t8, t6, 2f   /* if (srca == 0xff) */
1133     nop
1134    lhu          t2, 0(a0)    /* t2 = dst */
1135    lhu          t3, 2(a0)    /* t3 = dst */
1136    not          t0, a1
1137    not          t1, a1
1138    srl          t0, t0, 24
1139    srl          t1, t1, 24
1140    CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, s7, s8, s0, s1, s2, s3
1141    MIPS_2xUN8x4_MUL_2xUN8   t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
1142    addu_s.qb    t2, a1, t2
1143    addu_s.qb    t3, a1, t3
1144    CONVERT_2x8888_TO_2x0565 t2, t3, t2, t3, s6, s7, s8, s0, s1
1145    sh           t2, 0(a0)
1146    sh           t3, 2(a0)
1147    addiu        t1, a3, -1
1148    bgtz         t1, 0b
1149     addiu       a0, a0, 4
1150    b            4f
1151     nop
11522:
1153    CONVERT_1x8888_TO_1x0565 a1, t2, s0, s1
1154    sh           t2, 0(a0)
1155    sh           t2, 2(a0)
11563:
1157    addiu        t1, a3, -1
1158    bgtz         t1, 0b
1159     addiu       a0, a0, 4
1160
11614:
1162    beqz         a3, 7f
1163     nop
1164                              /* a1 = src */
1165    lw           t0, 0(a2)    /* t0 = mask */
1166    beqz         t0, 7f       /* if (t0 == 0) */
1167     nop
1168    beq          t0, t7, 5f  /* if (t0 == 0xffffffff) */
1169     nop
1170//if(ma)
1171    lhu          t1, 0(a0)    /* t1 = dst */
1172    MIPS_UN8x4_MUL_UN8x4     a1, t0, t2, t9, t3, t4, t5, s0
1173    MIPS_UN8x4_MUL_UN8       t0, t8, t0, t9, t3, t4, t5
1174    not          t0, t0
1175    CONVERT_1x0565_TO_1x8888 t1, s1, s2, s3
1176    MIPS_UN8x4_MUL_UN8x4     s1, t0, s1, t9, t3, t4, t5, s0
1177    addu_s.qb    s1, t2, s1
1178    CONVERT_1x8888_TO_1x0565 s1, t1, s0, s2
1179    sh           t1, 0(a0)
1180    RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
1181    j            ra
1182     nop
11835:
1184//if (t0 == 0xffffffff)
1185    beq          t8, t6, 6f   /* if (srca == 0xff) */
1186     nop
1187    lhu          t1, 0(a0)    /* t1 = dst */
1188    not          t0, a1
1189    srl          t0, t0, 24
1190    CONVERT_1x0565_TO_1x8888 t1, s1, s2, s3
1191    MIPS_UN8x4_MUL_UN8       s1, t0, s1, t9, t2, t3, t4
1192    addu_s.qb    s1, a1, s1
1193    CONVERT_1x8888_TO_1x0565 s1, t1, s0, s2
1194    sh           t1, 0(a0)
1195    RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
1196    j            ra
1197     nop
11986:
1199    CONVERT_1x8888_TO_1x0565 a1, t1, s0, s2
1200    sh           t1, 0(a0)
12017:
1202    RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
12038:
1204    j            ra
1205     nop
1206
1207END(pixman_composite_over_n_8888_0565_ca_asm_mips)
1208
1209LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8_asm_mips)
1210/*
1211 * a0 - dst  (a8)
1212 * a1 - src  (32bit constant)
1213 * a2 - mask (a8)
1214 * a3 - w
1215 */
1216
1217    SAVE_REGS_ON_STACK 0, v0
1218    li                t9, 0x00ff00ff
1219    beqz              a3, 3f
1220     nop
1221    srl               v0, a3, 2   /* v0 = how many multiples of 4 dst pixels */
1222    beqz              v0, 1f      /* branch if less than 4 src pixels */
1223     nop
1224
1225    srl               t8, a1, 24
1226    replv.ph          t8, t8
1227
12280:
1229    beqz              v0, 1f
1230     addiu            v0, v0, -1
1231    lbu               t0, 0(a2)
1232    lbu               t1, 1(a2)
1233    lbu               t2, 2(a2)
1234    lbu               t3, 3(a2)
1235    lbu               t4, 0(a0)
1236    lbu               t5, 1(a0)
1237    lbu               t6, 2(a0)
1238    lbu               t7, 3(a0)
1239
1240    addiu             a2, a2, 4
1241
1242    precr_sra.ph.w    t1, t0, 0
1243    precr_sra.ph.w    t3, t2, 0
1244    precr_sra.ph.w    t5, t4, 0
1245    precr_sra.ph.w    t7, t6, 0
1246
1247    precr.qb.ph       t0, t3, t1
1248    precr.qb.ph       t1, t7, t5
1249
1250    muleu_s.ph.qbl    t2, t0, t8
1251    muleu_s.ph.qbr    t3, t0, t8
1252    shra_r.ph         t4, t2, 8
1253    shra_r.ph         t5, t3, 8
1254    and               t4, t4, t9
1255    and               t5, t5, t9
1256    addq.ph           t2, t2, t4
1257    addq.ph           t3, t3, t5
1258    shra_r.ph         t2, t2, 8
1259    shra_r.ph         t3, t3, 8
1260    precr.qb.ph       t0, t2, t3
1261    not               t6, t0
1262
1263    preceu.ph.qbl     t7, t6
1264    preceu.ph.qbr     t6, t6
1265
1266    muleu_s.ph.qbl    t2, t1, t7
1267    muleu_s.ph.qbr    t3, t1, t6
1268    shra_r.ph         t4, t2, 8
1269    shra_r.ph         t5, t3, 8
1270    and               t4, t4, t9
1271    and               t5, t5, t9
1272    addq.ph           t2, t2, t4
1273    addq.ph           t3, t3, t5
1274    shra_r.ph         t2, t2, 8
1275    shra_r.ph         t3, t3, 8
1276    precr.qb.ph       t1, t2, t3
1277
1278    addu_s.qb         t2, t0, t1
1279
1280    sb                t2, 0(a0)
1281    srl               t2, t2, 8
1282    sb                t2, 1(a0)
1283    srl               t2, t2, 8
1284    sb                t2, 2(a0)
1285    srl               t2, t2, 8
1286    sb                t2, 3(a0)
1287    addiu             a3, a3, -4
1288    b                 0b
1289     addiu            a0, a0, 4
1290
12911:
1292    beqz              a3, 3f
1293     nop
1294    srl               t8, a1, 24
12952:
1296    lbu               t0, 0(a2)
1297    lbu               t1, 0(a0)
1298    addiu             a2, a2, 1
1299
1300    mul               t2, t0, t8
1301    shra_r.ph         t3, t2, 8
1302    andi              t3, t3, 0x00ff
1303    addq.ph           t2, t2, t3
1304    shra_r.ph         t2, t2, 8
1305    not               t3, t2
1306    andi              t3, t3, 0x00ff
1307
1308
1309    mul               t4, t1, t3
1310    shra_r.ph         t5, t4, 8
1311    andi              t5, t5, 0x00ff
1312    addq.ph           t4, t4, t5
1313    shra_r.ph         t4, t4, 8
1314    andi              t4, t4, 0x00ff
1315
1316    addu_s.qb         t2, t2, t4
1317    sb                t2, 0(a0)
1318    addiu             a3, a3, -1
1319    bnez              a3, 2b
1320     addiu            a0, a0, 1
1321
13223:
1323    RESTORE_REGS_FROM_STACK 0, v0
1324    j                 ra
1325     nop
1326
1327END(pixman_composite_over_n_8_8_asm_mips)
1328
1329LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8888_asm_mips)
1330/*
1331 * a0 - dst  (a8r8g8b8)
1332 * a1 - src  (32bit constant)
1333 * a2 - mask (a8)
1334 * a3 - w
1335 */
1336
1337    SAVE_REGS_ON_STACK 4, s0, s1, s2, s3, s4
1338    beqz      a3, 4f
1339     nop
1340    li        t4, 0x00ff00ff
1341    li        t5, 0xff
1342    addiu     t0, a3, -1
1343    beqz      t0, 3f         /* last pixel */
1344     srl      t6, a1, 24     /* t6 = srca */
1345    not       s4, a1
1346    beq       t5, t6, 2f     /* if (srca == 0xff) */
1347     srl      s4, s4, 24
13481:
1349                             /* a1 = src */
1350    lbu       t0, 0(a2)      /* t0 = mask */
1351    lbu       t1, 1(a2)      /* t1 = mask */
1352    or        t2, t0, t1
1353    beqz      t2, 111f       /* if (t0 == 0) && (t1 == 0) */
1354     addiu    a2, a2, 2
1355    and       t3, t0, t1
1356
1357    lw        t2, 0(a0)      /* t2 = dst */
1358    beq       t3, t5, 11f    /* if (t0 == 0xff) && (t1 == 0xff) */
1359     lw       t3, 4(a0)      /* t3 = dst */
1360
1361    MIPS_2xUN8x4_MUL_2xUN8 a1, a1, t0, t1, s0, s1, t4, t6, t7, t8, t9, s2, s3
1362    not       s2, s0
1363    not       s3, s1
1364    srl       s2, s2, 24
1365    srl       s3, s3, 24
1366    MIPS_2xUN8x4_MUL_2xUN8 t2, t3, s2, s3, t2, t3, t4, t0, t1, t6, t7, t8, t9
1367    addu_s.qb s2, t2, s0
1368    addu_s.qb s3, t3, s1
1369    sw        s2, 0(a0)
1370    b         111f
1371     sw       s3, 4(a0)
137211:
1373    MIPS_2xUN8x4_MUL_2xUN8 t2, t3, s4, s4, t2, t3, t4, t0, t1, t6, t7, t8, t9
1374    addu_s.qb s2, t2, a1
1375    addu_s.qb s3, t3, a1
1376    sw        s2, 0(a0)
1377    sw        s3, 4(a0)
1378
1379111:
1380    addiu     a3, a3, -2
1381    addiu     t0, a3, -1
1382    bgtz      t0, 1b
1383     addiu    a0, a0, 8
1384    b         3f
1385     nop
13862:
1387                             /* a1 = src */
1388    lbu       t0, 0(a2)      /* t0 = mask */
1389    lbu       t1, 1(a2)      /* t1 = mask */
1390    or        t2, t0, t1
1391    beqz      t2, 222f       /* if (t0 == 0) && (t1 == 0) */
1392     addiu    a2, a2, 2
1393    and       t3, t0, t1
1394    beq       t3, t5, 22f    /* if (t0 == 0xff) && (t1 == 0xff) */
1395     nop
1396    lw        t2, 0(a0)      /* t2 = dst */
1397    lw        t3, 4(a0)      /* t3 = dst */
1398
1399    OVER_2x8888_2x8_2x8888 a1, a1, t0, t1, t2, t3, \
1400                           t6, t7, t4, t8, t9, s0, s1, s2, s3
1401    sw        t6, 0(a0)
1402    b         222f
1403     sw        t7, 4(a0)
140422:
1405    sw        a1, 0(a0)
1406    sw        a1, 4(a0)
1407222:
1408    addiu     a3, a3, -2
1409    addiu     t0, a3, -1
1410    bgtz      t0, 2b
1411     addiu    a0, a0, 8
14123:
1413    blez      a3, 4f
1414     nop
1415                             /* a1 = src */
1416    lbu       t0, 0(a2)      /* t0 = mask */
1417    beqz      t0, 4f         /* if (t0 == 0) */
1418     addiu    a2, a2, 1
1419    move      t3, a1
1420    beq       t0, t5, 31f    /* if (t0 == 0xff) */
1421     lw       t1, 0(a0)      /* t1 = dst */
1422
1423    MIPS_UN8x4_MUL_UN8 a1, t0, t3, t4, t6, t7, t8
142431:
1425    not       t2, t3
1426    srl       t2, t2, 24
1427    MIPS_UN8x4_MUL_UN8 t1, t2, t1, t4, t6, t7, t8
1428    addu_s.qb t2, t1, t3
1429    sw        t2, 0(a0)
14304:
1431    RESTORE_REGS_FROM_STACK 4, s0, s1, s2, s3, s4
1432    j         ra
1433     nop
1434
1435END(pixman_composite_over_n_8_8888_asm_mips)
1436
1437LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm_mips)
1438/*
1439 * a0 - dst  (r5g6b5)
1440 * a1 - src  (32bit constant)
1441 * a2 - mask (a8)
1442 * a3 - w
1443 */
1444    SAVE_REGS_ON_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
1445    beqz     a3, 4f
1446     nop
1447    li       t4, 0x00ff00ff
1448    li       t5, 0xff
1449    li       t6, 0xf800f800
1450    li       t7, 0x07e007e0
1451    li       t8, 0x001F001F
1452    addiu    t1, a3, -1
1453    beqz     t1, 3f         /* last pixel */
1454     srl     t0, a1, 24     /* t0 = srca */
1455    not      v0, a1
1456    beq      t0, t5, 2f     /* if (srca == 0xff) */
1457     srl     v0, v0, 24
14581:
1459                            /* a1 = src */
1460    lbu      t0, 0(a2)      /* t0 = mask */
1461    lbu      t1, 1(a2)      /* t1 = mask */
1462    or       t2, t0, t1
1463    beqz     t2, 111f       /* if (t0 == 0) && (t1 == 0) */
1464     addiu   a2, a2, 2
1465    lhu      t2, 0(a0)      /* t2 = dst */
1466    lhu      t3, 2(a0)      /* t3 = dst */
1467    CONVERT_2x0565_TO_2x8888 t2, t3, s0, s1, t7, t8, t9, s2, s3, s4
1468    and      t9, t0, t1
1469    beq      t9, t5, 11f    /* if (t0 == 0xff) && (t1 == 0xff) */
1470     nop
1471
1472    MIPS_2xUN8x4_MUL_2xUN8   a1, a1, t0, t1, s2, s3, t4, t9, s4, s5, s6, s7, s8
1473    not      s4, s2
1474    not      s5, s3
1475    srl      s4, s4, 24
1476    srl      s5, s5, 24
1477    MIPS_2xUN8x4_MUL_2xUN8   s0, s1, s4, s5, s0, s1, t4, t9, t0, t1, s6, s7, s8
1478    addu_s.qb                s4, s2, s0
1479    addu_s.qb                s5, s3, s1
1480    CONVERT_2x8888_TO_2x0565 s4, s5, t2, t3, t6, t7, t8, s0, s1
1481    sh       t2, 0(a0)
1482    b        111f
1483     sh      t3, 2(a0)
148411:
1485    MIPS_2xUN8x4_MUL_2xUN8   s0, s1, v0, v0, s0, s1, t4, t9, t0, t1, s6, s7, s8
1486    addu_s.qb                s4, a1, s0
1487    addu_s.qb                s5, a1, s1
1488    CONVERT_2x8888_TO_2x0565 s4, s5, t2, t3, t6, t7, t8, s0, s1
1489    sh       t2, 0(a0)
1490    sh       t3, 2(a0)
1491111:
1492    addiu    a3, a3, -2
1493    addiu    t0, a3, -1
1494    bgtz     t0, 1b
1495     addiu   a0, a0, 4
1496    b        3f
1497     nop
14982:
1499    CONVERT_1x8888_TO_1x0565 a1, s0, s1, s2
150021:
1501                            /* a1 = src */
1502    lbu      t0, 0(a2)      /* t0 = mask */
1503    lbu      t1, 1(a2)      /* t1 = mask */
1504    or       t2, t0, t1
1505    beqz     t2, 222f       /* if (t0 == 0) && (t1 == 0) */
1506     addiu   a2, a2, 2
1507    and      t9, t0, t1
1508    move     s2, s0
1509    beq      t9, t5, 22f    /* if (t0 == 0xff) && (t2 == 0xff) */
1510     move    s3, s0
1511    lhu      t2, 0(a0)      /* t2 = dst */
1512    lhu      t3, 2(a0)      /* t3 = dst */
1513
1514    CONVERT_2x0565_TO_2x8888 t2, t3, s2, s3, t7, t8, s4, s5, s6, s7
1515    OVER_2x8888_2x8_2x8888   a1, a1, t0, t1, s2, s3, \
1516                             t2, t3, t4, t9, s4, s5, s6, s7, s8
1517    CONVERT_2x8888_TO_2x0565 t2, t3, s2, s3, t6, t7, t8, s4, s5
151822:
1519    sh       s2, 0(a0)
1520    sh       s3, 2(a0)
1521222:
1522    addiu    a3, a3, -2
1523    addiu    t0, a3, -1
1524    bgtz     t0, 21b
1525     addiu   a0, a0, 4
15263:
1527    blez      a3, 4f
1528     nop
1529                            /* a1 = src */
1530    lbu      t0, 0(a2)      /* t0 = mask */
1531    beqz     t0, 4f         /* if (t0 == 0) */
1532     nop
1533    lhu      t1, 0(a0)      /* t1 = dst */
1534    CONVERT_1x0565_TO_1x8888 t1, t2, t3, t7
1535    beq      t0, t5, 31f    /* if (t0 == 0xff) */
1536     move    t3, a1
1537
1538    MIPS_UN8x4_MUL_UN8       a1, t0, t3, t4, t7, t8, t9
153931:
1540    not      t6, t3
1541    srl      t6, t6, 24
1542    MIPS_UN8x4_MUL_UN8       t2, t6, t2, t4, t7, t8, t9
1543    addu_s.qb                t1, t2, t3
1544    CONVERT_1x8888_TO_1x0565 t1, t2, t3, t7
1545    sh       t2, 0(a0)
15464:
1547    RESTORE_REGS_FROM_STACK  24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
1548    j        ra
1549     nop
1550
1551END(pixman_composite_over_n_8_0565_asm_mips)
1552
1553LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_8888_asm_mips)
1554/*
1555 * a0 - dst  (a8r8g8b8)
1556 * a1 - src  (a8r8g8b8)
1557 * a2 - mask (32bit constant)
1558 * a3 - w
1559 */
1560
1561    SAVE_REGS_ON_STACK 0, s0
1562    li       t4, 0x00ff00ff
1563    beqz     a3, 3f
1564     nop
1565    addiu    t1, a3, -1
1566    srl      a2, a2, 24
1567    beqz     t1, 2f
1568     nop
1569
15701:
1571    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
1572    lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
1573                       /* a2 = mask        (32bit constant) */
1574    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
1575    lw       t3, 4(a0) /* t3 = destination (a8r8g8b8) */
1576    addiu    a1, a1, 8
1577
1578    OVER_2x8888_2x8_2x8888 t0, t1, a2, a2, t2, t3, \
1579                           t5, t6, t4, t7, t8, t9, t0, t1, s0
1580
1581    sw       t5, 0(a0)
1582    sw       t6, 4(a0)
1583    addiu    a3, a3, -2
1584    addiu    t1, a3, -1
1585    bgtz     t1, 1b
1586     addiu   a0, a0, 8
15872:
1588    beqz     a3, 3f
1589     nop
1590    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
1591                       /* a2 = mask        (32bit constant) */
1592    lw       t1, 0(a0) /* t1 = destination (a8r8g8b8) */
1593
1594    OVER_8888_8_8888 t0, a2, t1, t3, t4, t5, t6, t7, t8
1595
1596    sw       t3, 0(a0)
15973:
1598    RESTORE_REGS_FROM_STACK 0, s0
1599    j        ra
1600     nop
1601
1602END(pixman_composite_over_8888_n_8888_asm_mips)
1603
1604LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_0565_asm_mips)
1605/*
1606 * a0 - dst  (r5g6b5)
1607 * a1 - src  (a8r8g8b8)
1608 * a2 - mask (32bit constant)
1609 * a3 - w
1610 */
1611
1612    SAVE_REGS_ON_STACK 0, s0, s1, s2, s3
1613    li       t6, 0x00ff00ff
1614    li       t7, 0xf800f800
1615    li       t8, 0x07e007e0
1616    li       t9, 0x001F001F
1617    beqz     a3, 3f
1618     nop
1619    srl      a2, a2, 24
1620    addiu    t1, a3, -1
1621    beqz     t1, 2f
1622     nop
16231:
1624    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
1625    lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
1626                       /* a2 = mask        (32bit constant) */
1627    lhu      t2, 0(a0) /* t2 = destination (r5g6b5) */
1628    lhu      t3, 2(a0) /* t2 = destination (r5g6b5) */
1629    addiu    a1, a1, 8
1630
1631    CONVERT_2x0565_TO_2x8888 t2, t3, t4, t5, t8, t9, s0, s1, t2, t3
1632    OVER_2x8888_2x8_2x8888   t0, t1, a2, a2, t4, t5, \
1633                             t2, t3, t6, t0, t1, s0, s1, s2, s3
1634    CONVERT_2x8888_TO_2x0565 t2, t3, t4, t5, t7, t8, t9, s0, s1
1635
1636    sh       t4, 0(a0)
1637    sh       t5, 2(a0)
1638    addiu    a3, a3, -2
1639    addiu    t1, a3, -1
1640    bgtz     t1, 1b
1641     addiu   a0, a0, 4
16422:
1643    beqz     a3, 3f
1644     nop
1645    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
1646                       /* a2 = mask        (32bit constant) */
1647    lhu      t1, 0(a0) /* t1 = destination (r5g6b5) */
1648
1649    CONVERT_1x0565_TO_1x8888 t1, t2, t4, t5
1650    OVER_8888_8_8888         t0, a2, t2, t1, t6, t3, t4, t5, t7
1651    CONVERT_1x8888_TO_1x0565 t1, t3, t4, t5
1652
1653    sh       t3, 0(a0)
16543:
1655    RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
1656    j                 ra
1657     nop
1658
1659END(pixman_composite_over_8888_n_0565_asm_mips)
1660
1661LEAF_MIPS_DSPR2(pixman_composite_over_0565_n_0565_asm_mips)
1662/*
1663 * a0 - dst  (r5g6b5)
1664 * a1 - src  (r5g6b5)
1665 * a2 - mask (32bit constant)
1666 * a3 - w
1667 */
1668
1669    SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5
1670    li       t6, 0x00ff00ff
1671    li       t7, 0xf800f800
1672    li       t8, 0x07e007e0
1673    li       t9, 0x001F001F
1674    beqz     a3, 3f
1675     nop
1676    srl      a2, a2, 24
1677    addiu    t1, a3, -1
1678    beqz     t1, 2f
1679     nop
16801:
1681    lhu      t0, 0(a1) /* t0 = source      (r5g6b5) */
1682    lhu      t1, 2(a1) /* t1 = source      (r5g6b5) */
1683                       /* a2 = mask        (32bit constant) */
1684    lhu      t2, 0(a0) /* t2 = destination (r5g6b5) */
1685    lhu      t3, 2(a0) /* t3 = destination (r5g6b5) */
1686    addiu    a1, a1, 4
1687
1688    CONVERT_2x0565_TO_2x8888 t0, t1, t4, t5, t8, t9, s0, s1, s2, s3
1689    CONVERT_2x0565_TO_2x8888 t2, t3, s0, s1, t8, t9, s2, s3, s4, s5
1690    OVER_2x8888_2x8_2x8888   t4, t5, a2, a2, s0, s1, \
1691                             t0, t1, t6, s2, s3, s4, s5, t4, t5
1692    CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t7, t8, t9, s2, s3
1693
1694    sh       s0, 0(a0)
1695    sh       s1, 2(a0)
1696    addiu    a3, a3, -2
1697    addiu    t1, a3, -1
1698    bgtz     t1, 1b
1699     addiu   a0, a0, 4
17002:
1701    beqz     a3, 3f
1702     nop
1703    lhu      t0, 0(a1) /* t0 = source      (r5g6b5) */
1704                       /* a2 = mask        (32bit constant) */
1705    lhu      t1, 0(a0) /* t1 = destination (r5g6b5) */
1706
1707    CONVERT_1x0565_TO_1x8888 t0, t2, t4, t5
1708    CONVERT_1x0565_TO_1x8888 t1, t3, t4, t5
1709    OVER_8888_8_8888         t2, a2, t3, t0, t6, t1, t4, t5, t7
1710    CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5
1711
1712    sh       t3, 0(a0)
17133:
1714    RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5
1715    j        ra
1716     nop
1717
1718END(pixman_composite_over_0565_n_0565_asm_mips)
1719
1720LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_8888_asm_mips)
1721/*
1722 * a0 - dst  (a8r8g8b8)
1723 * a1 - src  (a8r8g8b8)
1724 * a2 - mask (a8)
1725 * a3 - w
1726 */
1727
1728    SAVE_REGS_ON_STACK 0, s0, s1
1729    li       t4, 0x00ff00ff
1730    beqz     a3, 3f
1731     nop
1732    addiu    t1, a3, -1
1733    beqz     t1, 2f
1734     nop
17351:
1736    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
1737    lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
1738    lbu      t2, 0(a2) /* t2 = mask        (a8) */
1739    lbu      t3, 1(a2) /* t3 = mask        (a8) */
1740    lw       t5, 0(a0) /* t5 = destination (a8r8g8b8) */
1741    lw       t6, 4(a0) /* t6 = destination (a8r8g8b8) */
1742    addiu    a1, a1, 8
1743    addiu    a2, a2, 2
1744
1745    OVER_2x8888_2x8_2x8888 t0, t1, t2, t3, t5, t6, \
1746                           t7, t8, t4, t9, s0, s1, t0, t1, t2
1747
1748    sw       t7, 0(a0)
1749    sw       t8, 4(a0)
1750    addiu    a3, a3, -2
1751    addiu    t1, a3, -1
1752    bgtz     t1, 1b
1753     addiu   a0, a0, 8
17542:
1755    beqz     a3, 3f
1756     nop
1757    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
1758    lbu      t1, 0(a2) /* t1 = mask        (a8) */
1759    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
1760
1761    OVER_8888_8_8888 t0, t1, t2, t3, t4, t5, t6, t7, t8
1762
1763    sw       t3, 0(a0)
17643:
1765    RESTORE_REGS_FROM_STACK 0, s0, s1
1766    j        ra
1767     nop
1768
1769END(pixman_composite_over_8888_8_8888_asm_mips)
1770
1771LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_0565_asm_mips)
1772/*
1773 * a0 - dst  (r5g6b5)
1774 * a1 - src  (a8r8g8b8)
1775 * a2 - mask (a8)
1776 * a3 - w
1777 */
1778
1779    SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5
1780    li       t6, 0x00ff00ff
1781    li       t7, 0xf800f800
1782    li       t8, 0x07e007e0
1783    li       t9, 0x001F001F
1784    beqz     a3, 3f
1785     nop
1786    addiu    t1, a3, -1
1787    beqz     t1, 2f
1788     nop
17891:
1790    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
1791    lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
1792    lbu      t2, 0(a2) /* t2 = mask        (a8) */
1793    lbu      t3, 1(a2) /* t3 = mask        (a8) */
1794    lhu      t4, 0(a0) /* t4 = destination (r5g6b5) */
1795    lhu      t5, 2(a0) /* t5 = destination (r5g6b5) */
1796    addiu    a1, a1, 8
1797    addiu    a2, a2, 2
1798
1799    CONVERT_2x0565_TO_2x8888 t4, t5, s0, s1, t8, t9, s2, s3, s4, s5
1800    OVER_2x8888_2x8_2x8888   t0, t1, t2, t3, s0, s1, \
1801                             t4, t5, t6, s2, s3, s4, s5, t0, t1
1802    CONVERT_2x8888_TO_2x0565 t4, t5, s0, s1, t7, t8, t9, s2, s3
1803
1804    sh       s0, 0(a0)
1805    sh       s1, 2(a0)
1806    addiu    a3, a3, -2
1807    addiu    t1, a3, -1
1808    bgtz     t1, 1b
1809     addiu   a0, a0, 4
18102:
1811    beqz     a3, 3f
1812     nop
1813    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
1814    lbu      t1, 0(a2) /* t1 = mask        (a8) */
1815    lhu      t2, 0(a0) /* t2 = destination (r5g6b5) */
1816
1817    CONVERT_1x0565_TO_1x8888 t2, t3, t4, t5
1818    OVER_8888_8_8888         t0, t1, t3, t2, t6, t4, t5, t7, t8
1819    CONVERT_1x8888_TO_1x0565 t2, t3, t4, t5
1820
1821    sh       t3, 0(a0)
18223:
1823    RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5
1824    j        ra
1825     nop
1826
1827END(pixman_composite_over_8888_8_0565_asm_mips)
1828
1829LEAF_MIPS_DSPR2(pixman_composite_over_0565_8_0565_asm_mips)
1830/*
1831 * a0 - dst  (r5g6b5)
1832 * a1 - src  (r5g6b5)
1833 * a2 - mask (a8)
1834 * a3 - w
1835 */
1836
1837    SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5
1838    li       t4, 0xf800f800
1839    li       t5, 0x07e007e0
1840    li       t6, 0x001F001F
1841    li       t7, 0x00ff00ff
1842    beqz     a3, 3f
1843     nop
1844    addiu    t1, a3, -1
1845    beqz     t1, 2f
1846     nop
18471:
1848    lhu      t0, 0(a1) /* t0 = source      (r5g6b5) */
1849    lhu      t1, 2(a1) /* t1 = source      (r5g6b5) */
1850    lbu      t2, 0(a2) /* t2 = mask        (a8) */
1851    lbu      t3, 1(a2) /* t3 = mask        (a8) */
1852    lhu      t8, 0(a0) /* t8 = destination (r5g6b5) */
1853    lhu      t9, 2(a0) /* t9 = destination (r5g6b5) */
1854    addiu    a1, a1, 4
1855    addiu    a2, a2, 2
1856
1857    CONVERT_2x0565_TO_2x8888 t0, t1, s0, s1, t5, t6, s2, s3, s4, s5
1858    CONVERT_2x0565_TO_2x8888 t8, t9, s2, s3, t5, t6, s4, s5, t0, t1
1859    OVER_2x8888_2x8_2x8888   s0, s1, t2, t3, s2, s3, \
1860                             t0, t1, t7, s4, s5, t8, t9, s0, s1
1861    CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t4, t5, t6, s2, s3
1862
1863    sh       s0, 0(a0)
1864    sh       s1, 2(a0)
1865    addiu    a3, a3, -2
1866    addiu    t1, a3, -1
1867    bgtz     t1, 1b
1868     addiu   a0, a0, 4
18692:
1870    beqz     a3, 3f
1871     nop
1872    lhu      t0, 0(a1) /* t0 = source      (r5g6b5) */
1873    lbu      t1, 0(a2) /* t1 = mask        (a8) */
1874    lhu      t2, 0(a0) /* t2 = destination (r5g6b5) */
1875
1876    CONVERT_1x0565_TO_1x8888 t0, t3, t4, t5
1877    CONVERT_1x0565_TO_1x8888 t2, t4, t5, t6
1878    OVER_8888_8_8888         t3, t1, t4, t0, t7, t2, t5, t6, t8
1879    CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5
1880
1881    sh       t3, 0(a0)
18823:
1883    RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5
1884    j        ra
1885     nop
1886
1887END(pixman_composite_over_0565_8_0565_asm_mips)
1888
1889LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_8888_asm_mips)
1890/*
1891 * a0 - dst  (a8r8g8b8)
1892 * a1 - src  (a8r8g8b8)
1893 * a2 - mask (a8r8g8b8)
1894 * a3 - w
1895 */
1896
1897    SAVE_REGS_ON_STACK 0, s0, s1, s2
1898    li       t4, 0x00ff00ff
1899    beqz     a3, 3f
1900     nop
1901    addiu    t1, a3, -1
1902    beqz     t1, 2f
1903     nop
19041:
1905    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
1906    lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
1907    lw       t2, 0(a2) /* t2 = mask        (a8r8g8b8) */
1908    lw       t3, 4(a2) /* t3 = mask        (a8r8g8b8) */
1909    lw       t5, 0(a0) /* t5 = destination (a8r8g8b8) */
1910    lw       t6, 4(a0) /* t6 = destination (a8r8g8b8) */
1911    addiu    a1, a1, 8
1912    addiu    a2, a2, 8
1913    srl      t2, t2, 24
1914    srl      t3, t3, 24
1915
1916    OVER_2x8888_2x8_2x8888 t0, t1, t2, t3, t5, t6, t7, t8, t4, t9, s0, s1, s2, t0, t1
1917
1918    sw       t7, 0(a0)
1919    sw       t8, 4(a0)
1920    addiu    a3, a3, -2
1921    addiu    t1, a3, -1
1922    bgtz     t1, 1b
1923     addiu   a0, a0, 8
19242:
1925    beqz     a3, 3f
1926     nop
1927    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
1928    lw       t1, 0(a2) /* t1 = mask        (a8r8g8b8) */
1929    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
1930    srl      t1, t1, 24
1931
1932    OVER_8888_8_8888 t0, t1, t2, t3, t4, t5, t6, t7, t8
1933
1934    sw       t3, 0(a0)
19353:
1936    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
1937    j        ra
1938     nop
1939
1940END(pixman_composite_over_8888_8888_8888_asm_mips)
1941
1942LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_asm_mips)
1943/*
1944 * a0 - dst  (a8r8g8b8)
1945 * a1 - src  (a8r8g8b8)
1946 * a2 - w
1947 */
1948
1949    SAVE_REGS_ON_STACK 0, s0, s1, s2
1950    li           t4, 0x00ff00ff
1951    beqz         a2, 3f
1952     nop
1953    addiu        t1, a2, -1
1954    beqz         t1, 2f
1955     nop
19561:
1957    lw           t0, 0(a1) /* t0 = source      (a8r8g8b8) */
1958    lw           t1, 4(a1) /* t1 = source      (a8r8g8b8) */
1959    lw           t2, 0(a0) /* t2 = destination (a8r8g8b8) */
1960    lw           t3, 4(a0) /* t3 = destination (a8r8g8b8) */
1961    addiu        a1, a1, 8
1962
1963    not          t5, t0
1964    srl          t5, t5, 24
1965    not          t6, t1
1966    srl          t6, t6, 24
1967
1968    or           t7, t5, t6
1969    beqz         t7, 11f
1970     or          t8, t0, t1
1971    beqz         t8, 12f
1972
1973    MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t5, t6, t7, t8, t4, t9, s0, s1, s2, t2, t3
1974
1975    addu_s.qb    t0, t7, t0
1976    addu_s.qb    t1, t8, t1
197711:
1978    sw           t0, 0(a0)
1979    sw           t1, 4(a0)
198012:
1981    addiu        a2, a2, -2
1982    addiu        t1, a2, -1
1983    bgtz         t1, 1b
1984     addiu       a0, a0, 8
19852:
1986    beqz         a2, 3f
1987     nop
1988
1989    lw           t0, 0(a1) /* t0 = source      (a8r8g8b8) */
1990    lw           t1, 0(a0) /* t1 = destination (a8r8g8b8) */
1991    addiu        a1, a1, 4
1992
1993    not          t2, t0
1994    srl          t2, t2, 24
1995
1996    beqz         t2, 21f
1997     nop
1998    beqz         t0, 3f
1999
2000    MIPS_UN8x4_MUL_UN8 t1, t2, t3, t4, t5, t6, t7
2001
2002    addu_s.qb    t0, t3, t0
200321:
2004    sw           t0, 0(a0)
2005
20063:
2007    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
2008    j            ra
2009     nop
2010
2011END(pixman_composite_over_8888_8888_asm_mips)
2012
2013LEAF_MIPS_DSPR2(pixman_composite_over_8888_0565_asm_mips)
2014/*
2015 * a0 - dst  (r5g6b5)
2016 * a1 - src  (a8r8g8b8)
2017 * a2 - w
2018 */
2019
2020    SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5
2021    li           t4, 0x00ff00ff
2022    li           s3, 0xf800f800
2023    li           s4, 0x07e007e0
2024    li           s5, 0x001F001F
2025    beqz         a2, 3f
2026     nop
2027    addiu        t1, a2, -1
2028    beqz         t1, 2f
2029     nop
20301:
2031    lw           t0, 0(a1) /* t0 = source      (a8r8g8b8) */
2032    lw           t1, 4(a1) /* t1 = source      (a8r8g8b8) */
2033    lhu          t2, 0(a0) /* t2 = destination (r5g6b5) */
2034    lhu          t3, 2(a0) /* t3 = destination (r5g6b5) */
2035    addiu        a1, a1, 8
2036
2037    not          t5, t0
2038    srl          t5, t5, 24
2039    not          t6, t1
2040    srl          t6, t6, 24
2041
2042    or           t7, t5, t6
2043    beqz         t7, 11f
2044     or          t8, t0, t1
2045    beqz         t8, 12f
2046
2047    CONVERT_2x0565_TO_2x8888 t2, t3, s0, s1, s4, s5, t7, t8, t9, s2
2048    MIPS_2xUN8x4_MUL_2xUN8   s0, s1, t5, t6, t7, t8, t4, t9, t2, t3, s2, s0, s1
2049
2050    addu_s.qb    t0, t7, t0
2051    addu_s.qb    t1, t8, t1
205211:
2053    CONVERT_2x8888_TO_2x0565 t0, t1, t7, t8, s3, s4, s5, t2, t3
2054    sh           t7, 0(a0)
2055    sh           t8, 2(a0)
205612:
2057    addiu        a2, a2, -2
2058    addiu        t1, a2, -1
2059    bgtz         t1, 1b
2060     addiu       a0, a0, 4
20612:
2062    beqz         a2, 3f
2063     nop
2064
2065    lw           t0, 0(a1) /* t0 = source      (a8r8g8b8) */
2066    lhu          t1, 0(a0) /* t1 = destination (r5g6b5) */
2067    addiu        a1, a1, 4
2068
2069    not          t2, t0
2070    srl          t2, t2, 24
2071
2072    beqz         t2, 21f
2073     nop
2074    beqz         t0, 3f
2075
2076    CONVERT_1x0565_TO_1x8888 t1, s0, t8, t9
2077    MIPS_UN8x4_MUL_UN8       s0, t2, t3, t4, t5, t6, t7
2078
2079    addu_s.qb    t0, t3, t0
208021:
2081    CONVERT_1x8888_TO_1x0565 t0, s0, t8, t9
2082    sh           s0, 0(a0)
2083
20843:
2085    RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5
2086    j            ra
2087     nop
2088
2089END(pixman_composite_over_8888_0565_asm_mips)
2090
2091LEAF_MIPS_DSPR2(pixman_composite_over_n_0565_asm_mips)
2092/*
2093 * a0 - dst  (r5g6b5)
2094 * a1 - src  (32bit constant)
2095 * a2 - w
2096 */
2097
2098    beqz         a2, 5f
2099     nop
2100
2101    not          t0, a1
2102    srl          t0, t0, 24
2103    bgtz         t0, 1f
2104     nop
2105    CONVERT_1x8888_TO_1x0565 a1, t1, t2, t3
21060:
2107    sh           t1, 0(a0)
2108    addiu        a2, a2, -1
2109    bgtz         a2, 0b
2110     addiu       a0, a0, 2
2111    j            ra
2112     nop
2113
21141:
2115    SAVE_REGS_ON_STACK 0, s0, s1, s2
2116    li           t4, 0x00ff00ff
2117    li           t5, 0xf800f800
2118    li           t6, 0x07e007e0
2119    li           t7, 0x001F001F
2120    addiu        t1, a2, -1
2121    beqz         t1, 3f
2122     nop
21232:
2124    lhu          t1, 0(a0) /* t1 = destination (r5g6b5) */
2125    lhu          t2, 2(a0) /* t2 = destination (r5g6b5) */
2126
2127    CONVERT_2x0565_TO_2x8888 t1, t2, t3, t8, t6, t7, t9, s0, s1, s2
2128    MIPS_2xUN8x4_MUL_2xUN8   t3, t8, t0, t0, t1, t2, t4, t9, s0, s1, s2, t3, t8
2129    addu_s.qb                t1, t1, a1
2130    addu_s.qb                t2, t2, a1
2131    CONVERT_2x8888_TO_2x0565 t1, t2, t3, t8, t5, t6, t7, s0, s1
2132
2133    sh           t3, 0(a0)
2134    sh           t8, 2(a0)
2135
2136    addiu        a2, a2, -2
2137    addiu        t1, a2, -1
2138    bgtz         t1, 2b
2139     addiu       a0, a0, 4
21403:
2141    beqz         a2, 4f
2142     nop
2143
2144    lhu          t1, 0(a0) /* t1 = destination (r5g6b5) */
2145
2146    CONVERT_1x0565_TO_1x8888 t1, t2, s0, s1
2147    MIPS_UN8x4_MUL_UN8       t2, t0, t1, t4, s0, s1, s2
2148    addu_s.qb                t1, t1, a1
2149    CONVERT_1x8888_TO_1x0565 t1, t2, s0, s1
2150
2151    sh           t2, 0(a0)
2152
21534:
2154    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
21555:
2156    j            ra
2157     nop
2158
2159END(pixman_composite_over_n_0565_asm_mips)
2160
2161LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_asm_mips)
2162/*
2163 * a0 - dst  (a8r8g8b8)
2164 * a1 - src  (32bit constant)
2165 * a2 - w
2166 */
2167
2168    beqz         a2, 5f
2169     nop
2170
2171    not          t0, a1
2172    srl          t0, t0, 24
2173    bgtz         t0, 1f
2174     nop
21750:
2176    sw           a1, 0(a0)
2177    addiu        a2, a2, -1
2178    bgtz         a2, 0b
2179     addiu       a0, a0, 4
2180    j            ra
2181     nop
2182
21831:
2184    SAVE_REGS_ON_STACK 0, s0, s1, s2
2185    li           t4, 0x00ff00ff
2186    addiu        t1, a2, -1
2187    beqz         t1, 3f
2188     nop
21892:
2190    lw           t2, 0(a0) /* t2 = destination (a8r8g8b8) */
2191    lw           t3, 4(a0) /* t3 = destination (a8r8g8b8) */
2192
2193    MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t0, t7, t8, t4, t9, s0, s1, s2, t2, t3
2194
2195    addu_s.qb    t7, t7, a1
2196    addu_s.qb    t8, t8, a1
2197
2198    sw           t7, 0(a0)
2199    sw           t8, 4(a0)
2200
2201    addiu        a2, a2, -2
2202    addiu        t1, a2, -1
2203    bgtz         t1, 2b
2204     addiu       a0, a0, 8
22053:
2206    beqz         a2, 4f
2207     nop
2208
2209    lw           t1, 0(a0) /* t1 = destination (a8r8g8b8) */
2210
2211    MIPS_UN8x4_MUL_UN8 t1, t0, t3, t4, t5, t6, t7
2212
2213    addu_s.qb    t3, t3, a1
2214
2215    sw           t3, 0(a0)
2216
22174:
2218    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
22195:
2220    j            ra
2221     nop
2222
2223END(pixman_composite_over_n_8888_asm_mips)
2224
2225LEAF_MIPS_DSPR2(pixman_composite_add_8_8_8_asm_mips)
2226/*
2227 * a0 - dst  (a8)
2228 * a1 - src  (a8)
2229 * a2 - mask (a8)
2230 * a3 - w
2231 */
2232
2233    SAVE_REGS_ON_STACK 0, v0, v1
2234    li                t9, 0x00ff00ff
2235    beqz              a3, 3f
2236     nop
2237
2238    srl               v0, a3, 2   /* v0 = how many multiples of 4 dst pixels */
2239    beqz              v0, 1f      /* branch if less than 4 src pixels */
2240     nop
2241
22420:
2243    beqz              v0, 1f
2244     addiu            v0, v0, -1
2245    lbu               t0, 0(a2)
2246    lbu               t1, 1(a2)
2247    lbu               t2, 2(a2)
2248    lbu               t3, 3(a2)
2249    lbu               t4, 0(a0)
2250    lbu               t5, 1(a0)
2251    lbu               t6, 2(a0)
2252    lbu               t7, 3(a0)
2253
2254    addiu             a2, a2, 4
2255
2256    precr_sra.ph.w    t1, t0, 0
2257    precr_sra.ph.w    t3, t2, 0
2258    precr_sra.ph.w    t5, t4, 0
2259    precr_sra.ph.w    t7, t6, 0
2260
2261    precr.qb.ph       t0, t3, t1
2262    precr.qb.ph       t1, t7, t5
2263
2264    lbu               t4, 0(a1)
2265    lbu               v1, 1(a1)
2266    lbu               t7, 2(a1)
2267    lbu               t8, 3(a1)
2268
2269    addiu             a1, a1, 4
2270
2271    precr_sra.ph.w    v1, t4, 0
2272    precr_sra.ph.w    t8, t7, 0
2273
2274    muleu_s.ph.qbl    t2, t0, t8
2275    muleu_s.ph.qbr    t3, t0, v1
2276    shra_r.ph         t4, t2, 8
2277    shra_r.ph         t5, t3, 8
2278    and               t4, t4, t9
2279    and               t5, t5, t9
2280    addq.ph           t2, t2, t4
2281    addq.ph           t3, t3, t5
2282    shra_r.ph         t2, t2, 8
2283    shra_r.ph         t3, t3, 8
2284    precr.qb.ph       t0, t2, t3
2285
2286    addu_s.qb         t2, t0, t1
2287
2288    sb                t2, 0(a0)
2289    srl               t2, t2, 8
2290    sb                t2, 1(a0)
2291    srl               t2, t2, 8
2292    sb                t2, 2(a0)
2293    srl               t2, t2, 8
2294    sb                t2, 3(a0)
2295    addiu             a3, a3, -4
2296    b                 0b
2297     addiu            a0, a0, 4
2298
22991:
2300    beqz              a3, 3f
2301     nop
23022:
2303    lbu               t8, 0(a1)
2304    lbu               t0, 0(a2)
2305    lbu               t1, 0(a0)
2306    addiu             a1, a1, 1
2307    addiu             a2, a2, 1
2308
2309    mul               t2, t0, t8
2310    shra_r.ph         t3, t2, 8
2311    andi              t3, t3, 0xff
2312    addq.ph           t2, t2, t3
2313    shra_r.ph         t2, t2, 8
2314    andi              t2, t2, 0xff
2315
2316    addu_s.qb         t2, t2, t1
2317    sb                t2, 0(a0)
2318    addiu             a3, a3, -1
2319    bnez              a3, 2b
2320     addiu            a0, a0, 1
2321
23223:
2323    RESTORE_REGS_FROM_STACK 0, v0, v1
2324    j                 ra
2325     nop
2326
2327END(pixman_composite_add_8_8_8_asm_mips)
2328
2329LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8_asm_mips)
2330/*
2331 * a0 - dst  (a8)
2332 * a1 - src  (32bit constant)
2333 * a2 - mask (a8)
2334 * a3 - w
2335 */
2336
2337    SAVE_REGS_ON_STACK 0, v0
2338    li                t9, 0x00ff00ff
2339    beqz              a3, 3f
2340     nop
2341
2342    srl               v0, a3, 2   /* v0 = how many multiples of 4 dst pixels */
2343    beqz              v0, 1f      /* branch if less than 4 src pixels */
2344     nop
2345
2346    srl               t8, a1, 24
2347    replv.ph          t8, t8
2348
23490:
2350    beqz              v0, 1f
2351     addiu            v0, v0, -1
2352    lbu               t0, 0(a2)
2353    lbu               t1, 1(a2)
2354    lbu               t2, 2(a2)
2355    lbu               t3, 3(a2)
2356    lbu               t4, 0(a0)
2357    lbu               t5, 1(a0)
2358    lbu               t6, 2(a0)
2359    lbu               t7, 3(a0)
2360
2361    addiu             a2, a2, 4
2362
2363    precr_sra.ph.w    t1, t0, 0
2364    precr_sra.ph.w    t3, t2, 0
2365    precr_sra.ph.w    t5, t4, 0
2366    precr_sra.ph.w    t7, t6, 0
2367
2368    precr.qb.ph       t0, t3, t1
2369    precr.qb.ph       t1, t7, t5
2370
2371    muleu_s.ph.qbl    t2, t0, t8
2372    muleu_s.ph.qbr    t3, t0, t8
2373    shra_r.ph         t4, t2, 8
2374    shra_r.ph         t5, t3, 8
2375    and               t4, t4, t9
2376    and               t5, t5, t9
2377    addq.ph           t2, t2, t4
2378    addq.ph           t3, t3, t5
2379    shra_r.ph         t2, t2, 8
2380    shra_r.ph         t3, t3, 8
2381    precr.qb.ph       t0, t2, t3
2382
2383    addu_s.qb         t2, t0, t1
2384
2385    sb                t2, 0(a0)
2386    srl               t2, t2, 8
2387    sb                t2, 1(a0)
2388    srl               t2, t2, 8
2389    sb                t2, 2(a0)
2390    srl               t2, t2, 8
2391    sb                t2, 3(a0)
2392    addiu             a3, a3, -4
2393    b                 0b
2394     addiu            a0, a0, 4
2395
23961:
2397    beqz              a3, 3f
2398     nop
2399    srl               t8, a1, 24
24002:
2401    lbu               t0, 0(a2)
2402    lbu               t1, 0(a0)
2403    addiu             a2, a2, 1
2404
2405    mul               t2, t0, t8
2406    shra_r.ph         t3, t2, 8
2407    andi              t3, t3, 0xff
2408    addq.ph           t2, t2, t3
2409    shra_r.ph         t2, t2, 8
2410    andi              t2, t2, 0xff
2411
2412    addu_s.qb         t2, t2, t1
2413    sb                t2, 0(a0)
2414    addiu             a3, a3, -1
2415    bnez              a3, 2b
2416     addiu            a0, a0, 1
2417
24183:
2419    RESTORE_REGS_FROM_STACK 0, v0
2420    j                 ra
2421     nop
2422
2423END(pixman_composite_add_n_8_8_asm_mips)
2424
2425LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8888_asm_mips)
2426/*
2427 * a0 - dst  (a8r8g8b8)
2428 * a1 - src  (32bit constant)
2429 * a2 - mask (a8)
2430 * a3 - w
2431 */
2432
2433    SAVE_REGS_ON_STACK 0, s0, s1, s2
2434    li       t4, 0x00ff00ff
2435    beqz     a3, 3f
2436     nop
2437    addiu    t1, a3, -1
2438    beqz     t1, 2f
2439     nop
24401:
2441                       /* a1 = source      (32bit constant) */
2442    lbu      t0, 0(a2) /* t0 = mask        (a8) */
2443    lbu      t1, 1(a2) /* t1 = mask        (a8) */
2444    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
2445    lw       t3, 4(a0) /* t3 = destination (a8r8g8b8) */
2446    addiu    a2, a2, 2
2447
2448    MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 a1, a1, \
2449                                       t0, t1, \
2450                                       t2, t3, \
2451                                       t5, t6, \
2452                                       t4, t7, t8, t9, s0, s1, s2
2453
2454    sw       t5, 0(a0)
2455    sw       t6, 4(a0)
2456    addiu    a3, a3, -2
2457    addiu    t1, a3, -1
2458    bgtz     t1, 1b
2459     addiu   a0, a0, 8
24602:
2461    beqz     a3, 3f
2462     nop
2463                       /* a1 = source      (32bit constant) */
2464    lbu      t0, 0(a2) /* t0 = mask        (a8) */
2465    lw       t1, 0(a0) /* t1 = destination (a8r8g8b8) */
2466
2467    MIPS_UN8x4_MUL_UN8_ADD_UN8x4 a1, t0, t1, t2, t4, t3, t5, t6
2468
2469    sw       t2, 0(a0)
24703:
2471    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
2472    j        ra
2473     nop
2474
2475END(pixman_composite_add_n_8_8888_asm_mips)
2476
2477LEAF_MIPS_DSPR2(pixman_composite_add_0565_8_0565_asm_mips)
2478/*
2479 * a0 - dst  (r5g6b5)
2480 * a1 - src  (r5g6b5)
2481 * a2 - mask (a8)
2482 * a3 - w
2483 */
2484
2485    SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
2486    li       t4, 0xf800f800
2487    li       t5, 0x07e007e0
2488    li       t6, 0x001F001F
2489    li       t7, 0x00ff00ff
2490    beqz     a3, 3f
2491     nop
2492    addiu    t1, a3, -1
2493    beqz     t1, 2f
2494     nop
24951:
2496    lhu      t0, 0(a1) /* t0 = source      (r5g6b5) */
2497    lhu      t1, 2(a1) /* t1 = source      (r5g6b5) */
2498    lbu      t2, 0(a2) /* t2 = mask        (a8) */
2499    lbu      t3, 1(a2) /* t3 = mask        (a8) */
2500    lhu      t8, 0(a0) /* t8 = destination (r5g6b5) */
2501    lhu      t9, 2(a0) /* t9 = destination (r5g6b5) */
2502    addiu    a1, a1, 4
2503    addiu    a2, a2, 2
2504
2505    CONVERT_2x0565_TO_2x8888  t0, t1, s0, s1, t5, t6, s2, s3, s4, s5
2506    CONVERT_2x0565_TO_2x8888  t8, t9, s2, s3, t5, t6, s4, s5, s6, s7
2507    MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4  s0, s1, \
2508                                        t2, t3, \
2509                                        s2, s3, \
2510                                        t0, t1, \
2511                                        t7, s4, s5, s6, s7, t8, t9
2512    CONVERT_2x8888_TO_2x0565  t0, t1, s0, s1, t4, t5, t6, s2, s3
2513
2514    sh       s0, 0(a0)
2515    sh       s1, 2(a0)
2516    addiu    a3, a3, -2
2517    addiu    t1, a3, -1
2518    bgtz     t1, 1b
2519     addiu   a0, a0, 4
25202:
2521    beqz     a3, 3f
2522     nop
2523    lhu      t0, 0(a1) /* t0 = source      (r5g6b5) */
2524    lbu      t1, 0(a2) /* t1 = mask        (a8) */
2525    lhu      t2, 0(a0) /* t2 = destination (r5g6b5) */
2526
2527    CONVERT_1x0565_TO_1x8888  t0, t3, t4, t5
2528    CONVERT_1x0565_TO_1x8888  t2, t4, t5, t6
2529    MIPS_UN8x4_MUL_UN8_ADD_UN8x4  t3, t1, t4, t0, t7, t2, t5, t6
2530    CONVERT_1x8888_TO_1x0565  t0, t3, t4, t5
2531
2532    sh       t3, 0(a0)
25333:
2534    RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
2535    j        ra
2536     nop
2537
2538END(pixman_composite_add_0565_8_0565_asm_mips)
2539
2540LEAF_MIPS_DSPR2(pixman_composite_add_8888_8_8888_asm_mips)
2541/*
2542 * a0 - dst  (a8r8g8b8)
2543 * a1 - src  (a8r8g8b8)
2544 * a2 - mask (a8)
2545 * a3 - w
2546 */
2547
2548    SAVE_REGS_ON_STACK 0, s0, s1, s2
2549    li       t4, 0x00ff00ff
2550    beqz     a3, 3f
2551     nop
2552    addiu    t1, a3, -1
2553    beqz     t1, 2f
2554     nop
25551:
2556    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
2557    lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
2558    lbu      t2, 0(a2) /* t2 = mask        (a8) */
2559    lbu      t3, 1(a2) /* t3 = mask        (a8) */
2560    lw       t5, 0(a0) /* t5 = destination (a8r8g8b8) */
2561    lw       t6, 4(a0) /* t6 = destination (a8r8g8b8) */
2562    addiu    a1, a1, 8
2563    addiu    a2, a2, 2
2564
2565    MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \
2566                                       t2, t3, \
2567                                       t5, t6, \
2568                                       t7, t8, \
2569                                       t4, t9, s0, s1, s2, t0, t1
2570
2571    sw       t7, 0(a0)
2572    sw       t8, 4(a0)
2573    addiu    a3, a3, -2
2574    addiu    t1, a3, -1
2575    bgtz     t1, 1b
2576     addiu   a0, a0, 8
25772:
2578    beqz     a3, 3f
2579     nop
2580    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
2581    lbu      t1, 0(a2) /* t1 = mask        (a8) */
2582    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
2583
2584    MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t3, t4, t5, t6, t7
2585
2586    sw       t3, 0(a0)
25873:
2588    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
2589    j        ra
2590     nop
2591
2592END(pixman_composite_add_8888_8_8888_asm_mips)
2593
2594LEAF_MIPS_DSPR2(pixman_composite_add_8888_n_8888_asm_mips)
2595/*
2596 * a0 - dst  (a8r8g8b8)
2597 * a1 - src  (a8r8g8b8)
2598 * a2 - mask (32bit constant)
2599 * a3 - w
2600 */
2601
2602    SAVE_REGS_ON_STACK 0, s0, s1, s2
2603    li       t4, 0x00ff00ff
2604    beqz     a3, 3f
2605     nop
2606    srl      a2, a2, 24
2607    addiu    t1, a3, -1
2608    beqz     t1, 2f
2609     nop
26101:
2611    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
2612    lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
2613                       /* a2 = mask        (32bit constant) */
2614    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
2615    lw       t3, 4(a0) /* t3 = destination (a8r8g8b8) */
2616    addiu    a1, a1, 8
2617
2618    MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \
2619                                       a2, a2, \
2620                                       t2, t3, \
2621                                       t5, t6, \
2622                                       t4, t7, t8, t9, s0, s1, s2
2623
2624    sw       t5, 0(a0)
2625    sw       t6, 4(a0)
2626    addiu    a3, a3, -2
2627    addiu    t1, a3, -1
2628    bgtz     t1, 1b
2629     addiu   a0, a0, 8
26302:
2631    beqz     a3, 3f
2632     nop
2633    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
2634                       /* a2 = mask        (32bit constant) */
2635    lw       t1, 0(a0) /* t1 = destination (a8r8g8b8) */
2636
2637    MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, a2, t1, t3, t4, t5, t6, t7
2638
2639    sw       t3, 0(a0)
26403:
2641    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
2642    j        ra
2643     nop
2644
2645END(pixman_composite_add_8888_n_8888_asm_mips)
2646
2647LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_8888_asm_mips)
2648/*
2649 * a0 - dst  (a8r8g8b8)
2650 * a1 - src  (a8r8g8b8)
2651 * a2 - mask (a8r8g8b8)
2652 * a3 - w
2653 */
2654
2655    SAVE_REGS_ON_STACK 0, s0, s1, s2
2656    li       t4, 0x00ff00ff
2657    beqz     a3, 3f
2658     nop
2659    addiu    t1, a3, -1
2660    beqz     t1, 2f
2661     nop
26621:
2663    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
2664    lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
2665    lw       t2, 0(a2) /* t2 = mask        (a8r8g8b8) */
2666    lw       t3, 4(a2) /* t3 = mask        (a8r8g8b8) */
2667    lw       t5, 0(a0) /* t5 = destination (a8r8g8b8) */
2668    lw       t6, 4(a0) /* t6 = destination (a8r8g8b8) */
2669    addiu    a1, a1, 8
2670    addiu    a2, a2, 8
2671    srl      t2, t2, 24
2672    srl      t3, t3, 24
2673
2674    MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \
2675                                       t2, t3, \
2676                                       t5, t6, \
2677                                       t7, t8, \
2678                                       t4, t9, s0, s1, s2, t0, t1
2679
2680    sw       t7, 0(a0)
2681    sw       t8, 4(a0)
2682    addiu    a3, a3, -2
2683    addiu    t1, a3, -1
2684    bgtz     t1, 1b
2685     addiu   a0, a0, 8
26862:
2687    beqz     a3, 3f
2688     nop
2689    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
2690    lw       t1, 0(a2) /* t1 = mask        (a8r8g8b8) */
2691    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
2692    srl      t1, t1, 24
2693
2694    MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t3, t4, t5, t6, t7
2695
2696    sw       t3, 0(a0)
26973:
2698    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
2699    j        ra
2700     nop
2701
2702END(pixman_composite_add_8888_8888_8888_asm_mips)
2703
2704LEAF_MIPS_DSPR2(pixman_composite_add_8_8_asm_mips)
2705/*
2706 * a0 - dst  (a8)
2707 * a1 - src  (a8)
2708 * a2 - w
2709 */
2710
2711    beqz              a2, 3f
2712     nop
2713    srl               t9, a2, 2   /* t9 = how many multiples of 4 dst pixels */
2714    beqz              t9, 1f      /* branch if less than 4 src pixels */
2715     nop
2716
27170:
2718    beqz              t9, 1f
2719     addiu            t9, t9, -1
2720    lbu               t0, 0(a1)
2721    lbu               t1, 1(a1)
2722    lbu               t2, 2(a1)
2723    lbu               t3, 3(a1)
2724    lbu               t4, 0(a0)
2725    lbu               t5, 1(a0)
2726    lbu               t6, 2(a0)
2727    lbu               t7, 3(a0)
2728
2729    addiu             a1, a1, 4
2730
2731    precr_sra.ph.w    t1, t0, 0
2732    precr_sra.ph.w    t3, t2, 0
2733    precr_sra.ph.w    t5, t4, 0
2734    precr_sra.ph.w    t7, t6, 0
2735
2736    precr.qb.ph       t0, t3, t1
2737    precr.qb.ph       t1, t7, t5
2738
2739    addu_s.qb         t2, t0, t1
2740
2741    sb                t2, 0(a0)
2742    srl               t2, t2, 8
2743    sb                t2, 1(a0)
2744    srl               t2, t2, 8
2745    sb                t2, 2(a0)
2746    srl               t2, t2, 8
2747    sb                t2, 3(a0)
2748    addiu             a2, a2, -4
2749    b                 0b
2750     addiu            a0, a0, 4
2751
27521:
2753    beqz              a2, 3f
2754     nop
27552:
2756    lbu               t0, 0(a1)
2757    lbu               t1, 0(a0)
2758    addiu             a1, a1, 1
2759
2760    addu_s.qb         t2, t0, t1
2761    sb                t2, 0(a0)
2762    addiu             a2, a2, -1
2763    bnez              a2, 2b
2764     addiu            a0, a0, 1
2765
27663:
2767    j                 ra
2768     nop
2769
2770END(pixman_composite_add_8_8_asm_mips)
2771
2772LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_asm_mips)
2773/*
2774 * a0 - dst (a8r8g8b8)
2775 * a1 - src (a8r8g8b8)
2776 * a2 - w
2777 */
2778
2779    beqz         a2, 4f
2780     nop
2781
2782    srl          t9, a2, 2      /* t1 = how many multiples of 4 src pixels */
2783    beqz         t9, 3f         /* branch if less than 4 src pixels */
2784     nop
27851:
2786    addiu        t9, t9, -1
2787    beqz         t9, 2f
2788     addiu       a2, a2, -4
2789
2790    lw           t0, 0(a1)
2791    lw           t1, 4(a1)
2792    lw           t2, 8(a1)
2793    lw           t3, 12(a1)
2794    lw           t4, 0(a0)
2795    lw           t5, 4(a0)
2796    lw           t6, 8(a0)
2797    lw           t7, 12(a0)
2798    addiu        a1, a1, 16
2799
2800    addu_s.qb    t4, t4, t0
2801    addu_s.qb    t5, t5, t1
2802    addu_s.qb    t6, t6, t2
2803    addu_s.qb    t7, t7, t3
2804
2805    sw           t4, 0(a0)
2806    sw           t5, 4(a0)
2807    sw           t6, 8(a0)
2808    sw           t7, 12(a0)
2809    b            1b
2810     addiu       a0, a0, 16
28112:
2812    lw           t0, 0(a1)
2813    lw           t1, 4(a1)
2814    lw           t2, 8(a1)
2815    lw           t3, 12(a1)
2816    lw           t4, 0(a0)
2817    lw           t5, 4(a0)
2818    lw           t6, 8(a0)
2819    lw           t7, 12(a0)
2820    addiu        a1, a1, 16
2821
2822    addu_s.qb    t4, t4, t0
2823    addu_s.qb    t5, t5, t1
2824    addu_s.qb    t6, t6, t2
2825    addu_s.qb    t7, t7, t3
2826
2827    sw           t4, 0(a0)
2828    sw           t5, 4(a0)
2829    sw           t6, 8(a0)
2830    sw           t7, 12(a0)
2831
2832    beqz         a2, 4f
2833     addiu       a0, a0, 16
28343:
2835    lw           t0, 0(a1)
2836    lw           t1, 0(a0)
2837    addiu        a1, a1, 4
2838    addiu        a2, a2, -1
2839    addu_s.qb    t1, t1, t0
2840    sw           t1, 0(a0)
2841    bnez         a2, 3b
2842     addiu       a0, a0, 4
28434:
2844    jr           ra
2845     nop
2846
2847END(pixman_composite_add_8888_8888_asm_mips)
2848
2849LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_0565_asm_mips)
2850/*
2851 * a0 - dst  (r5g6b5)
2852 * a1 - src  (a8)
2853 * a2 - w
2854 */
2855
2856    beqz     a2, 4f
2857     nop
2858
2859    SAVE_REGS_ON_STACK 0, s0, s1, s2, s3
2860    li       t2, 0xf800f800
2861    li       t3, 0x07e007e0
2862    li       t4, 0x001F001F
2863    li       t5, 0x00ff00ff
2864
2865    addiu    t1, a2, -1
2866    beqz     t1, 2f
2867     nop
28681:
2869    lbu      t0, 0(a1) /* t0 = source      (a8) */
2870    lbu      t1, 1(a1) /* t1 = source      (a8) */
2871    lhu      t6, 0(a0) /* t6 = destination (r5g6b5) */
2872    lhu      t7, 2(a0) /* t7 = destination (r5g6b5) */
2873    addiu    a1, a1, 2
2874
2875    not      t0, t0
2876    not      t1, t1
2877    andi     t0, 0xff  /* t0 = neg source1 */
2878    andi     t1, 0xff  /* t1 = neg source2 */
2879    CONVERT_2x0565_TO_2x8888 t6, t7, t8, t9, t3, t4, s0, s1, s2, s3
2880    MIPS_2xUN8x4_MUL_2xUN8   t8, t9, t0, t1, t6, t7, t5, s0, s1, s2, s3, t8, t9
2881    CONVERT_2x8888_TO_2x0565 t6, t7, t8, t9, t2, t3, t4, s0, s1
2882
2883    sh       t8, 0(a0)
2884    sh       t9, 2(a0)
2885    addiu    a2, a2, -2
2886    addiu    t1, a2, -1
2887    bgtz     t1, 1b
2888     addiu   a0, a0, 4
28892:
2890    beqz     a2, 3f
2891     nop
2892    lbu      t0, 0(a1) /* t0 = source      (a8) */
2893    lhu      t1, 0(a0) /* t1 = destination (r5g6b5) */
2894
2895    not      t0, t0
2896    andi     t0, 0xff  /* t0 = neg source */
2897    CONVERT_1x0565_TO_1x8888 t1, t2, t3, t4
2898    MIPS_UN8x4_MUL_UN8        t2, t0, t1, t5, t3, t4, t6
2899    CONVERT_1x8888_TO_1x0565 t1, t2, t3, t4
2900
2901    sh       t2, 0(a0)
29023:
2903    RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
29044:
2905    j        ra
2906     nop
2907
2908END(pixman_composite_out_reverse_8_0565_asm_mips)
2909
2910LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_8888_asm_mips)
2911/*
2912 * a0 - dst  (a8r8g8b8)
2913 * a1 - src  (a8)
2914 * a2 - w
2915 */
2916
2917    beqz     a2, 3f
2918     nop
2919    li       t4, 0x00ff00ff
2920    addiu    t1, a2, -1
2921    beqz     t1, 2f
2922     nop
29231:
2924    lbu      t0, 0(a1) /* t0 = source      (a8) */
2925    lbu      t1, 1(a1) /* t1 = source      (a8) */
2926    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
2927    lw       t3, 4(a0) /* t3 = destination (a8r8g8b8) */
2928    addiu    a1, a1, 2
2929    not      t0, t0
2930    not      t1, t1
2931    andi     t0, 0xff  /* t0 = neg source */
2932    andi     t1, 0xff  /* t1 = neg source */
2933
2934    MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t5, t6, t4, t7, t8, t9, t2, t3, t0
2935
2936    sw       t5, 0(a0)
2937    sw       t6, 4(a0)
2938    addiu    a2, a2, -2
2939    addiu    t1, a2, -1
2940    bgtz     t1, 1b
2941     addiu   a0, a0, 8
29422:
2943    beqz     a2, 3f
2944     nop
2945    lbu      t0, 0(a1) /* t0 = source      (a8) */
2946    lw       t1, 0(a0) /* t1 = destination (a8r8g8b8) */
2947    not      t0, t0
2948    andi     t0, 0xff  /* t0 = neg source */
2949
2950    MIPS_UN8x4_MUL_UN8 t1, t0, t2, t4, t3, t5, t6
2951
2952    sw       t2, 0(a0)
29533:
2954    j        ra
2955     nop
2956
2957END(pixman_composite_out_reverse_8_8888_asm_mips)
2958
2959LEAF_MIPS_DSPR2(pixman_composite_over_reverse_n_8888_asm_mips)
2960/*
2961 * a0 - dst  (a8r8g8b8)
2962 * a1 - src  (32bit constant)
2963 * a2 - w
2964 */
2965
2966    beqz              a2, 5f
2967     nop
2968
2969    SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
2970    li                t0, 0x00ff00ff
2971    srl               t9, a2, 2   /* t9 = how many multiples of 4 src pixels */
2972    beqz              t9, 2f      /* branch if less than 4 src pixels */
2973     nop
29741:
2975    beqz              t9, 2f
2976     addiu            t9, t9, -1
2977
2978    lw                t1, 0(a0)
2979    lw                t2, 4(a0)
2980    lw                t3, 8(a0)
2981    lw                t4, 12(a0)
2982
2983    addiu             a2, a2, -4
2984
2985    not               t5, t1
2986    not               t6, t2
2987    not               t7, t3
2988    not               t8, t4
2989    srl               t5, t5, 24
2990    srl               t6, t6, 24
2991    srl               t7, t7, 24
2992    srl               t8, t8, 24
2993    replv.ph          t5, t5
2994    replv.ph          t6, t6
2995    replv.ph          t7, t7
2996    replv.ph          t8, t8
2997    muleu_s.ph.qbl    s0, a1, t5
2998    muleu_s.ph.qbr    s1, a1, t5
2999    muleu_s.ph.qbl    s2, a1, t6
3000    muleu_s.ph.qbr    s3, a1, t6
3001    muleu_s.ph.qbl    s4, a1, t7
3002    muleu_s.ph.qbr    s5, a1, t7
3003    muleu_s.ph.qbl    s6, a1, t8
3004    muleu_s.ph.qbr    s7, a1, t8
3005
3006    shra_r.ph         t5, s0, 8
3007    shra_r.ph         t6, s1, 8
3008    shra_r.ph         t7, s2, 8
3009    shra_r.ph         t8, s3, 8
3010    and               t5, t5, t0
3011    and               t6, t6, t0
3012    and               t7, t7, t0
3013    and               t8, t8, t0
3014    addq.ph           s0, s0, t5
3015    addq.ph           s1, s1, t6
3016    addq.ph           s2, s2, t7
3017    addq.ph           s3, s3, t8
3018    shra_r.ph         s0, s0, 8
3019    shra_r.ph         s1, s1, 8
3020    shra_r.ph         s2, s2, 8
3021    shra_r.ph         s3, s3, 8
3022    shra_r.ph         t5, s4, 8
3023    shra_r.ph         t6, s5, 8
3024    shra_r.ph         t7, s6, 8
3025    shra_r.ph         t8, s7, 8
3026    and               t5, t5, t0
3027    and               t6, t6, t0
3028    and               t7, t7, t0
3029    and               t8, t8, t0
3030    addq.ph           s4, s4, t5
3031    addq.ph           s5, s5, t6
3032    addq.ph           s6, s6, t7
3033    addq.ph           s7, s7, t8
3034    shra_r.ph         s4, s4, 8
3035    shra_r.ph         s5, s5, 8
3036    shra_r.ph         s6, s6, 8
3037    shra_r.ph         s7, s7, 8
3038
3039    precr.qb.ph       t5, s0, s1
3040    precr.qb.ph       t6, s2, s3
3041    precr.qb.ph       t7, s4, s5
3042    precr.qb.ph       t8, s6, s7
3043    addu_s.qb         t5, t1, t5
3044    addu_s.qb         t6, t2, t6
3045    addu_s.qb         t7, t3, t7
3046    addu_s.qb         t8, t4, t8
3047
3048    sw                t5, 0(a0)
3049    sw                t6, 4(a0)
3050    sw                t7, 8(a0)
3051    sw                t8, 12(a0)
3052    b                 1b
3053     addiu            a0, a0, 16
3054
30552:
3056    beqz              a2, 4f
3057     nop
30583:
3059    lw                t1, 0(a0)
3060
3061    not               t2, t1
3062    srl               t2, t2, 24
3063    replv.ph          t2, t2
3064
3065    muleu_s.ph.qbl    t4, a1, t2
3066    muleu_s.ph.qbr    t5, a1, t2
3067    shra_r.ph         t6, t4, 8
3068    shra_r.ph         t7, t5, 8
3069
3070    and               t6,t6,t0
3071    and               t7,t7,t0
3072
3073    addq.ph           t8, t4, t6
3074    addq.ph           t9, t5, t7
3075
3076    shra_r.ph         t8, t8, 8
3077    shra_r.ph         t9, t9, 8
3078
3079    precr.qb.ph       t9, t8, t9
3080
3081    addu_s.qb         t9, t1, t9
3082    sw                t9, 0(a0)
3083
3084    addiu             a2, a2, -1
3085    bnez              a2, 3b
3086     addiu            a0, a0, 4
30874:
3088    RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
30895:
3090    j                 ra
3091     nop
3092
3093END(pixman_composite_over_reverse_n_8888_asm_mips)
3094
3095LEAF_MIPS_DSPR2(pixman_composite_in_n_8_asm_mips)
3096/*
3097 * a0 - dst  (a8)
3098 * a1 - src  (32bit constant)
3099 * a2 - w
3100 */
3101
3102    li                t9, 0x00ff00ff
3103    beqz              a2, 3f
3104     nop
3105    srl               t7, a2, 2   /* t7 = how many multiples of 4 dst pixels */
3106    beqz              t7, 1f      /* branch if less than 4 src pixels */
3107     nop
3108
3109    srl               t8, a1, 24
3110    replv.ph          t8, t8
3111
31120:
3113    beqz              t7, 1f
3114     addiu            t7, t7, -1
3115    lbu               t0, 0(a0)
3116    lbu               t1, 1(a0)
3117    lbu               t2, 2(a0)
3118    lbu               t3, 3(a0)
3119
3120    precr_sra.ph.w    t1, t0, 0
3121    precr_sra.ph.w    t3, t2, 0
3122    precr.qb.ph       t0, t3, t1
3123
3124    muleu_s.ph.qbl    t2, t0, t8
3125    muleu_s.ph.qbr    t3, t0, t8
3126    shra_r.ph         t4, t2, 8
3127    shra_r.ph         t5, t3, 8
3128    and               t4, t4, t9
3129    and               t5, t5, t9
3130    addq.ph           t2, t2, t4
3131    addq.ph           t3, t3, t5
3132    shra_r.ph         t2, t2, 8
3133    shra_r.ph         t3, t3, 8
3134    precr.qb.ph       t2, t2, t3
3135
3136    sb                t2, 0(a0)
3137    srl               t2, t2, 8
3138    sb                t2, 1(a0)
3139    srl               t2, t2, 8
3140    sb                t2, 2(a0)
3141    srl               t2, t2, 8
3142    sb                t2, 3(a0)
3143    addiu             a2, a2, -4
3144    b                 0b
3145     addiu            a0, a0, 4
3146
31471:
3148    beqz              a2, 3f
3149     nop
3150    srl               t8, a1, 24
31512:
3152    lbu               t0, 0(a0)
3153
3154    mul               t2, t0, t8
3155    shra_r.ph         t3, t2, 8
3156    andi              t3, t3, 0x00ff
3157    addq.ph           t2, t2, t3
3158    shra_r.ph         t2, t2, 8
3159
3160    sb                t2, 0(a0)
3161    addiu             a2, a2, -1
3162    bnez              a2, 2b
3163     addiu            a0, a0, 1
3164
31653:
3166    j                 ra
3167     nop
3168
3169END(pixman_composite_in_n_8_asm_mips)
3170
3171LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips)
3172/*
3173 * a0     - dst  (a8r8g8b8)
3174 * a1     - src  (a8r8g8b8)
3175 * a2     - w
3176 * a3     - vx
3177 * 16(sp) - unit_x
3178 */
3179
3180    SAVE_REGS_ON_STACK 0, s0, s1, s2, s3
3181    lw       t8, 16(sp) /* t8 = unit_x */
3182    li       t6, 0x00ff00ff
3183    beqz     a2, 3f
3184     nop
3185    addiu    t1, a2, -1
3186    beqz     t1, 2f
3187     nop
31881:
3189    sra      t0, a3, 16 /* t0 = vx >> 16 */
3190    sll      t0, t0, 2  /* t0 = t0 * 4 (a8r8g8b8) */
3191    addu     t0, a1, t0
3192    lw       t0, 0(t0)  /* t0 = source      (a8r8g8b8) */
3193    addu     a3, a3, t8 /* a3 = vx + unit_x */
3194
3195    sra      t1, a3, 16 /* t0 = vx >> 16 */
3196    sll      t1, t1, 2  /* t0 = t0 * 4 (a8r8g8b8) */
3197    addu     t1, a1, t1
3198    lw       t1, 0(t1)  /* t1 = source      (a8r8g8b8) */
3199    addu     a3, a3, t8 /* a3 = vx + unit_x */
3200
3201    lw       t2, 0(a0)  /* t2 = destination (a8r8g8b8) */
3202    lw       t3, 4(a0)  /* t3 = destination (a8r8g8b8) */
3203
3204    OVER_2x8888_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t9, s0, s1, s2, s3
3205
3206    sw       t4, 0(a0)
3207    sw       t5, 4(a0)
3208    addiu    a2, a2, -2
3209    addiu    t1, a2, -1
3210    bgtz     t1, 1b
3211     addiu   a0, a0, 8
32122:
3213    beqz     a2, 3f
3214     nop
3215    sra      t0, a3, 16 /* t0 = vx >> 16 */
3216    sll      t0, t0, 2  /* t0 = t0 * 4 (a8r8g8b8) */
3217    addu     t0, a1, t0
3218    lw       t0, 0(t0)  /* t0 = source      (a8r8g8b8) */
3219    lw       t1, 0(a0)  /* t1 = destination (a8r8g8b8) */
3220    addu     a3, a3, t8 /* a3 = vx + unit_x */
3221
3222    OVER_8888_8888 t0, t1, t2, t6, t4, t5, t3, t7
3223
3224    sw       t2, 0(a0)
32253:
3226    RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
3227    j        ra
3228     nop
3229
3230END(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips)
3231
3232LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips)
3233/*
3234 * a0     - dst  (r5g6b5)
3235 * a1     - src  (a8r8g8b8)
3236 * a2     - w
3237 * a3     - vx
3238 * 16(sp) - unit_x
3239 */
3240
3241    SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4, v0, v1
3242    lw       t8, 40(sp) /* t8 = unit_x */
3243    li       t4, 0x00ff00ff
3244    li       t5, 0xf800f800
3245    li       t6, 0x07e007e0
3246    li       t7, 0x001F001F
3247    beqz     a2, 3f
3248     nop
3249    addiu    t1, a2, -1
3250    beqz     t1, 2f
3251     nop
32521:
3253    sra      t0, a3, 16 /* t0 = vx >> 16 */
3254    sll      t0, t0, 2  /* t0 = t0 * 4 (a8r8g8b8) */
3255    addu     t0, a1, t0
3256    lw       t0, 0(t0)  /* t0 = source      (a8r8g8b8) */
3257    addu     a3, a3, t8 /* a3 = vx + unit_x */
3258    sra      t1, a3, 16 /* t0 = vx >> 16 */
3259    sll      t1, t1, 2  /* t0 = t0 * 4 (a8r8g8b8) */
3260    addu     t1, a1, t1
3261    lw       t1, 0(t1)  /* t1 = source      (a8r8g8b8) */
3262    addu     a3, a3, t8 /* a3 = vx + unit_x */
3263    lhu      t2, 0(a0)  /* t2 = destination (r5g6b5) */
3264    lhu      t3, 2(a0)  /* t3 = destination (r5g6b5) */
3265
3266    CONVERT_2x0565_TO_2x8888 t2, t3, v0, v1, t6, t7, s0, s1, s2, s3
3267    OVER_2x8888_2x8888       t0, t1, v0, v1, t2, t3, t4, t9, s0, s1, s2, s3, s4
3268    CONVERT_2x8888_TO_2x0565 t2, t3, v0, v1, t5, t6, t7, t9, s2
3269
3270    sh       v0, 0(a0)
3271    sh       v1, 2(a0)
3272    addiu    a2, a2, -2
3273    addiu    t1, a2, -1
3274    bgtz     t1, 1b
3275     addiu   a0, a0, 4
32762:
3277    beqz     a2, 3f
3278     nop
3279    sra      t0, a3, 16 /* t0 = vx >> 16 */
3280    sll      t0, t0, 2  /* t0 = t0 * 4 (a8r8g8b8) */
3281    addu     t0, a1, t0
3282    lw       t0, 0(t0)  /* t0 = source      (a8r8g8b8) */
3283    lhu      t1, 0(a0)  /* t1 = destination (r5g6b5) */
3284    addu     a3, a3, t8 /* a3 = vx + unit_x */
3285
3286    CONVERT_1x0565_TO_1x8888 t1, t2, t5, t6
3287    OVER_8888_8888           t0, t2, t1, t4, t3, t5, t6, t7
3288    CONVERT_1x8888_TO_1x0565 t1, t2, t5, t6
3289
3290    sh       t2, 0(a0)
32913:
3292    RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4, v0, v1
3293    j        ra
3294     nop
3295
3296END(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips)
3297
3298LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips)
3299/*
3300 * a0     - dst (a8r8g8b8)
3301 * a1     - src (r5g6b5)
3302 * a2     - w
3303 * a3     - vx
3304 * 16(sp) - unit_x
3305 */
3306
3307    SAVE_REGS_ON_STACK 0, v0
3308    beqz     a2, 3f
3309     nop
3310
3311    lw       v0, 16(sp) /* v0 = unit_x */
3312    addiu    t1, a2, -1
3313    beqz     t1, 2f
3314     nop
3315
3316    li       t4, 0x07e007e0
3317    li       t5, 0x001F001F
33181:
3319    sra      t0, a3, 16 /* t0 = vx >> 16 */
3320    sll      t0, t0, 1  /* t0 = t0 * 2 ((r5g6b5)) */
3321    addu     t0, a1, t0
3322    lhu      t0, 0(t0)  /* t0 = source ((r5g6b5)) */
3323    addu     a3, a3, v0 /* a3 = vx + unit_x */
3324    sra      t1, a3, 16 /* t1 = vx >> 16 */
3325    sll      t1, t1, 1  /* t1 = t1 * 2 ((r5g6b5)) */
3326    addu     t1, a1, t1
3327    lhu      t1, 0(t1)  /* t1 = source ((r5g6b5)) */
3328    addu     a3, a3, v0 /* a3 = vx + unit_x */
3329    addiu    a2, a2, -2
3330
3331    CONVERT_2x0565_TO_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9
3332
3333    sw       t2, 0(a0)
3334    sw       t3, 4(a0)
3335
3336    addiu    t2, a2, -1
3337    bgtz     t2, 1b
3338     addiu   a0, a0, 8
33392:
3340    beqz     a2, 3f
3341     nop
3342    sra      t0, a3, 16 /* t0 = vx >> 16 */
3343    sll      t0, t0, 1  /* t0 = t0 * 2 ((r5g6b5)) */
3344    addu     t0, a1, t0
3345    lhu      t0, 0(t0)  /* t0 = source ((r5g6b5)) */
3346
3347    CONVERT_1x0565_TO_1x8888 t0, t1, t2, t3
3348
3349    sw       t1, 0(a0)
33503:
3351    RESTORE_REGS_FROM_STACK 0, v0
3352    j        ra
3353     nop
3354
3355END(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips)
3356
3357LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips)
3358/*
3359 * a0     - dst  (r5g6b5)
3360 * a1     - src  (a8r8g8b8)
3361 * a2     - mask (a8)
3362 * a3     - w
3363 * 16(sp) - vx
3364 * 20(sp) - unit_x
3365 */
3366    beqz     a3, 4f
3367     nop
3368
3369    SAVE_REGS_ON_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5
3370    lw       v0, 36(sp) /* v0 = vx */
3371    lw       v1, 40(sp) /* v1 = unit_x */
3372    li       t6, 0x00ff00ff
3373    li       t7, 0xf800f800
3374    li       t8, 0x07e007e0
3375    li       t9, 0x001F001F
3376
3377    addiu    t1, a3, -1
3378    beqz     t1, 2f
3379     nop
33801:
3381    sra      t0, v0, 16 /* t0 = vx >> 16 */
3382    sll      t0, t0, 2  /* t0 = t0 * 4      (a8r8g8b8) */
3383    addu     t0, a1, t0
3384    lw       t0, 0(t0)  /* t0 = source      (a8r8g8b8) */
3385    addu     v0, v0, v1 /* v0 = vx + unit_x */
3386    sra      t1, v0, 16 /* t1 = vx >> 16 */
3387    sll      t1, t1, 2  /* t1 = t1 * 4      (a8r8g8b8) */
3388    addu     t1, a1, t1
3389    lw       t1, 0(t1)  /* t1 = source      (a8r8g8b8) */
3390    addu     v0, v0, v1 /* v0 = vx + unit_x */
3391    lbu      t2, 0(a2)  /* t2 = mask        (a8) */
3392    lbu      t3, 1(a2)  /* t3 = mask        (a8) */
3393    lhu      t4, 0(a0)  /* t4 = destination (r5g6b5) */
3394    lhu      t5, 2(a0)  /* t5 = destination (r5g6b5) */
3395    addiu    a2, a2, 2
3396
3397    CONVERT_2x0565_TO_2x8888 t4, t5, s0, s1, t8, t9, s2, s3, s4, s5
3398    OVER_2x8888_2x8_2x8888   t0, t1, \
3399                             t2, t3, \
3400                             s0, s1, \
3401                             t4, t5, \
3402                             t6, s2, s3, s4, s5, t2, t3
3403    CONVERT_2x8888_TO_2x0565 t4, t5, s0, s1, t7, t8, t9, s2, s3
3404
3405    sh       s0, 0(a0)
3406    sh       s1, 2(a0)
3407    addiu    a3, a3, -2
3408    addiu    t1, a3, -1
3409    bgtz     t1, 1b
3410     addiu   a0, a0, 4
34112:
3412    beqz     a3, 3f
3413     nop
3414    sra      t0, v0, 16 /* t0 = vx >> 16 */
3415    sll      t0, t0, 2  /* t0 = t0 * 4      (a8r8g8b8) */
3416    addu     t0, a1, t0
3417    lw       t0, 0(t0)  /* t0 = source      (a8r8g8b8) */
3418    lbu      t1, 0(a2)  /* t1 = mask        (a8) */
3419    lhu      t2, 0(a0)  /* t2 = destination (r5g6b5) */
3420
3421    CONVERT_1x0565_TO_1x8888 t2, t3, t4, t5
3422    OVER_8888_8_8888         t0, t1, t3, t2, t6, t4, t5, t7, t8
3423    CONVERT_1x8888_TO_1x0565 t2, t3, t4, t5
3424
3425    sh       t3, 0(a0)
34263:
3427    RESTORE_REGS_FROM_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5
34284:
3429    j        ra
3430     nop
3431
3432END(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips)
3433
3434LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_mips)
3435/*
3436 * a0     - dst  (r5g6b5)
3437 * a1     - src  (r5g6b5)
3438 * a2     - mask (a8)
3439 * a3     - w
3440 * 16(sp) - vx
3441 * 20(sp) - unit_x
3442 */
3443
3444    beqz     a3, 4f
3445     nop
3446    SAVE_REGS_ON_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5
3447    lw       v0, 36(sp) /* v0 = vx */
3448    lw       v1, 40(sp) /* v1 = unit_x */
3449    li       t4, 0xf800f800
3450    li       t5, 0x07e007e0
3451    li       t6, 0x001F001F
3452    li       t7, 0x00ff00ff
3453
3454    addiu    t1, a3, -1
3455    beqz     t1, 2f
3456     nop
34571:
3458    sra      t0, v0, 16 /* t0 = vx >> 16 */
3459    sll      t0, t0, 1  /* t0 = t0 * 2      (r5g6b5) */
3460    addu     t0, a1, t0
3461    lhu      t0, 0(t0)  /* t0 = source      (r5g6b5) */
3462    addu     v0, v0, v1 /* v0 = vx + unit_x */
3463    sra      t1, v0, 16 /* t1 = vx >> 16 */
3464    sll      t1, t1, 1  /* t1 = t1 * 2      (r5g6b5) */
3465    addu     t1, a1, t1
3466    lhu      t1, 0(t1)  /* t1 = source      (r5g6b5) */
3467    addu     v0, v0, v1 /* v0 = vx + unit_x */
3468    lbu      t2, 0(a2)  /* t2 = mask        (a8) */
3469    lbu      t3, 1(a2)  /* t3 = mask        (a8) */
3470    lhu      t8, 0(a0)  /* t8 = destination (r5g6b5) */
3471    lhu      t9, 2(a0)  /* t9 = destination (r5g6b5) */
3472    addiu    a2, a2, 2
3473
3474    CONVERT_2x0565_TO_2x8888 t0, t1, s0, s1, t5, t6, s2, s3, s4, s5
3475    CONVERT_2x0565_TO_2x8888 t8, t9, s2, s3, t5, t6, s4, s5, t0, t1
3476    OVER_2x8888_2x8_2x8888   s0, s1, \
3477                             t2, t3, \
3478                             s2, s3, \
3479                             t0, t1, \
3480                             t7, t8, t9, s4, s5, s0, s1
3481    CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t4, t5, t6, s2, s3
3482
3483    sh       s0, 0(a0)
3484    sh       s1, 2(a0)
3485    addiu    a3, a3, -2
3486    addiu    t1, a3, -1
3487    bgtz     t1, 1b
3488     addiu   a0, a0, 4
34892:
3490    beqz     a3, 3f
3491     nop
3492    sra      t0, v0, 16 /* t0 = vx >> 16 */
3493    sll      t0, t0, 1  /* t0 = t0 * 2      (r5g6b5) */
3494    addu     t0, a1, t0
3495
3496    lhu      t0, 0(t0)  /* t0 = source      (r5g6b5) */
3497    lbu      t1, 0(a2)  /* t1 = mask        (a8) */
3498    lhu      t2, 0(a0)  /* t2 = destination (r5g6b5) */
3499
3500    CONVERT_1x0565_TO_1x8888 t0, t3, t4, t5
3501    CONVERT_1x0565_TO_1x8888 t2, t4, t5, t6
3502    OVER_8888_8_8888         t3, t1, t4, t0, t7, t2, t5, t6, t8
3503    CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5
3504
3505    sh       t3, 0(a0)
35063:
3507    RESTORE_REGS_FROM_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5
35084:
3509    j        ra
3510     nop
3511
3512END(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_mips)
3513
3514LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
3515/*
3516 * a0     - *dst
3517 * a1     - *src_top
3518 * a2     - *src_bottom
3519 * a3     - w
3520 * 16(sp) - wt
3521 * 20(sp) - wb
3522 * 24(sp) - vx
3523 * 28(sp) - unit_x
3524 */
3525
3526    beqz     a3, 1f
3527     nop
3528
3529    SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
3530
3531    lw       s0, 36(sp)     /* s0 = wt */
3532    lw       s1, 40(sp)     /* s1 = wb */
3533    lw       s2, 44(sp)     /* s2 = vx */
3534    lw       s3, 48(sp)     /* s3 = unit_x */
3535    li       v0, BILINEAR_INTERPOLATION_RANGE
3536
3537    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
3538    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
35390:
3540    andi     t4, s2, 0xffff /* t4 = (short)vx */
3541    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
3542    subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
3543
3544    mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
3545    mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
3546    mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
3547    mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
3548
3549    sra      t9, s2, 16
3550    sll      t9, t9, 2
3551    addiu    t8, t9, 4
3552    lwx      t0, t9(a1)     /* t0 = tl */
3553    lwx      t1, t8(a1)     /* t1 = tr */
3554    addiu    a3, a3, -1
3555    lwx      t2, t9(a2)     /* t2 = bl */
3556    lwx      t3, t8(a2)     /* t3 = br */
3557
3558    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
3559
3560    addu     s2, s2, s3     /* vx += unit_x; */
3561    sw       t0, 0(a0)
3562    bnez     a3, 0b
3563     addiu   a0, a0, 4
3564
3565    RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
35661:
3567    j        ra
3568     nop
3569
3570END(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
3571
3572LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips)
3573/*
3574 * a0     - *dst
3575 * a1     - *src_top
3576 * a2     - *src_bottom
3577 * a3     - w
3578 * 16(sp) - wt
3579 * 20(sp) - wb
3580 * 24(sp) - vx
3581 * 28(sp) - unit_x
3582 */
3583
3584    beqz     a3, 1f
3585     nop
3586
3587    SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
3588
3589    lw       s0, 36(sp)     /* s0 = wt */
3590    lw       s1, 40(sp)     /* s1 = wb */
3591    lw       s2, 44(sp)     /* s2 = vx */
3592    lw       s3, 48(sp)     /* s3 = unit_x */
3593    li       v0, BILINEAR_INTERPOLATION_RANGE
3594
3595    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
3596    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
35970:
3598    andi     t4, s2, 0xffff /* t4 = (short)vx */
3599    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
3600    subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
3601
3602    mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
3603    mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
3604    mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
3605    mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
3606
3607    sra      t9, s2, 16
3608    sll      t9, t9, 2
3609    addiu    t8, t9, 4
3610    lwx      t0, t9(a1)     /* t0 = tl */
3611    lwx      t1, t8(a1)     /* t1 = tr */
3612    addiu    a3, a3, -1
3613    lwx      t2, t9(a2)     /* t2 = bl */
3614    lwx      t3, t8(a2)     /* t3 = br */
3615
3616    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
3617    CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
3618
3619    addu     s2, s2, s3     /* vx += unit_x; */
3620    sh       t1, 0(a0)
3621    bnez     a3, 0b
3622     addiu   a0, a0, 2
3623
3624    RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
36251:
3626    j        ra
3627     nop
3628
3629END(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips)
3630
3631LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips)
3632/*
3633 * a0     - *dst
3634 * a1     - *src_top
3635 * a2     - *src_bottom
3636 * a3     - w
3637 * 16(sp) - wt
3638 * 20(sp) - wb
3639 * 24(sp) - vx
3640 * 28(sp) - unit_x
3641 */
3642
3643    beqz     a3, 1f
3644     nop
3645
3646    SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
3647
3648    lw       s0, 44(sp)     /* s0 = wt */
3649    lw       s1, 48(sp)     /* s1 = wb */
3650    lw       s2, 52(sp)     /* s2 = vx */
3651    lw       s3, 56(sp)     /* s3 = unit_x */
3652    li       v0, BILINEAR_INTERPOLATION_RANGE
3653    li       v1, 0x07e007e0
3654    li       s8, 0x001f001f
3655
3656    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
3657    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
36580:
3659    andi     t4, s2, 0xffff /* t4 = (short)vx */
3660    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
3661    subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
3662
3663    mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
3664    mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
3665    mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
3666    mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
3667
3668    sra      t9, s2, 16
3669    sll      t9, t9, 1
3670    addiu    t8, t9, 2
3671    lhx      t0, t9(a1)     /* t0 = tl */
3672    lhx      t1, t8(a1)     /* t1 = tr */
3673    andi     t1, t1, 0xffff
3674    addiu    a3, a3, -1
3675    lhx      t2, t9(a2)     /* t2 = bl */
3676    lhx      t3, t8(a2)     /* t3 = br */
3677    andi     t3, t3, 0xffff
3678
3679    CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
3680    CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
3681    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
3682
3683    addu     s2, s2, s3     /* vx += unit_x; */
3684    sw       t0, 0(a0)
3685    bnez     a3, 0b
3686     addiu   a0, a0, 4
3687
3688    RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
36891:
3690    j        ra
3691     nop
3692
3693END(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips)
3694
3695LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips)
3696/*
3697 * a0     - *dst
3698 * a1     - *src_top
3699 * a2     - *src_bottom
3700 * a3     - w
3701 * 16(sp) - wt
3702 * 20(sp) - wb
3703 * 24(sp) - vx
3704 * 28(sp) - unit_x
3705 */
3706
3707    beqz     a3, 1f
3708     nop
3709
3710    SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
3711
3712    lw       s0, 44(sp)     /* s0 = wt */
3713    lw       s1, 48(sp)     /* s1 = wb */
3714    lw       s2, 52(sp)     /* s2 = vx */
3715    lw       s3, 56(sp)     /* s3 = unit_x */
3716    li       v0, BILINEAR_INTERPOLATION_RANGE
3717    li       v1, 0x07e007e0
3718    li       s8, 0x001f001f
3719
3720    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
3721    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
37220:
3723    andi     t4, s2, 0xffff /* t4 = (short)vx */
3724    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
3725    subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
3726
3727    mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
3728    mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
3729    mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
3730    mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
3731
3732    sra      t9, s2, 16
3733    sll      t9, t9, 1
3734    addiu    t8, t9, 2
3735    lhx      t0, t9(a1)     /* t0 = tl */
3736    lhx      t1, t8(a1)     /* t1 = tr */
3737    andi     t1, t1, 0xffff
3738    addiu    a3, a3, -1
3739    lhx      t2, t9(a2)     /* t2 = bl */
3740    lhx      t3, t8(a2)     /* t3 = br */
3741    andi     t3, t3, 0xffff
3742
3743    CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
3744    CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
3745    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
3746    CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
3747
3748    addu     s2, s2, s3     /* vx += unit_x; */
3749    sh       t1, 0(a0)
3750    bnez     a3, 0b
3751     addiu   a0, a0, 2
3752
3753    RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
37541:
3755    j        ra
3756     nop
3757
3758END(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips)
3759
3760LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips)
3761/*
3762 * a0     - *dst
3763 * a1     - *src_top
3764 * a2     - *src_bottom
3765 * a3     - w
3766 * 16(sp) - wt
3767 * 20(sp) - wb
3768 * 24(sp) - vx
3769 * 28(sp) - unit_x
3770 */
3771
3772    beqz     a3, 1f
3773     nop
3774
3775    SAVE_REGS_ON_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
3776
3777    lw       s0, 40(sp)     /* s0 = wt */
3778    lw       s1, 44(sp)     /* s1 = wb */
3779    lw       s2, 48(sp)     /* s2 = vx */
3780    lw       s3, 52(sp)     /* s3 = unit_x */
3781    li       v0, BILINEAR_INTERPOLATION_RANGE
3782    li       s8, 0x00ff00ff
3783
3784    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
3785    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
37860:
3787    andi     t4, s2, 0xffff /* t4 = (short)vx */
3788    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
3789    subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
3790
3791    mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
3792    mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
3793    mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
3794    mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
3795
3796    sra      t9, s2, 16
3797    sll      t9, t9, 2
3798    addiu    t8, t9, 4
3799    lwx      t0, t9(a1)     /* t0 = tl */
3800    lwx      t1, t8(a1)     /* t1 = tr */
3801    addiu    a3, a3, -1
3802    lwx      t2, t9(a2)     /* t2 = bl */
3803    lwx      t3, t8(a2)     /* t3 = br */
3804
3805    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
3806    lw       t1, 0(a0)      /* t1 = dest */
3807    OVER_8888_8888 t0, t1, t2, s8, t3, t4, t5, t6
3808
3809    addu     s2, s2, s3     /* vx += unit_x; */
3810    sw       t2, 0(a0)
3811    bnez     a3, 0b
3812     addiu   a0, a0, 4
3813
3814    RESTORE_REGS_FROM_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
38151:
3816    j        ra
3817     nop
3818
3819END(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips)
3820
3821LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips)
3822/*
3823 * a0     - *dst
3824 * a1     - *src_top
3825 * a2     - *src_bottom
3826 * a3     - w
3827 * 16(sp) - wt
3828 * 20(sp) - wb
3829 * 24(sp) - vx
3830 * 28(sp) - unit_x
3831 */
3832
3833    beqz         a3, 1f
3834     nop
3835
3836    SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
3837
3838    lw           s0, 36(sp)     /* s0 = wt */
3839    lw           s1, 40(sp)     /* s1 = wb */
3840    lw           s2, 44(sp)     /* s2 = vx */
3841    lw           s3, 48(sp)     /* s3 = unit_x */
3842    li           v0, BILINEAR_INTERPOLATION_RANGE
3843
3844    sll          s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
3845    sll          s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
38460:
3847    andi         t4, s2, 0xffff /* t4 = (short)vx */
3848    srl          t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
3849    subu         t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
3850
3851    mul          s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
3852    mul          s5, s0, t4     /* s5 = wt*(vx>>8) */
3853    mul          s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
3854    mul          s7, s1, t4     /* s7 = wb*(vx>>8) */
3855
3856    sra          t9, s2, 16
3857    sll          t9, t9, 2
3858    addiu        t8, t9, 4
3859    lwx          t0, t9(a1)     /* t0 = tl */
3860    lwx          t1, t8(a1)     /* t1 = tr */
3861    addiu        a3, a3, -1
3862    lwx          t2, t9(a2)     /* t2 = bl */
3863    lwx          t3, t8(a2)     /* t3 = br */
3864
3865    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
3866    lw           t1, 0(a0)
3867    addu_s.qb    t2, t0, t1
3868
3869    addu         s2, s2, s3     /* vx += unit_x; */
3870    sw           t2, 0(a0)
3871    bnez         a3, 0b
3872     addiu       a0, a0, 4
3873
3874    RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
38751:
3876    j            ra
3877     nop
3878
3879END(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips)
3880
3881LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips)
3882/*
3883 * a0     - *dst
3884 * a1     - *mask
3885 * a2     - *src_top
3886 * a3     - *src_bottom
3887 * 16(sp) - wt
3888 * 20(sp) - wb
3889 * 24(sp) - vx
3890 * 28(sp) - unit_x
3891 * 32(sp) - w
3892 */
3893
3894    lw       v1, 32(sp)
3895    beqz     v1, 1f
3896     nop
3897
3898    SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
3899
3900    lw       s0, 44(sp)        /* s0 = wt */
3901    lw       s1, 48(sp)        /* s1 = wb */
3902    lw       s2, 52(sp)        /* s2 = vx */
3903    lw       s3, 56(sp)        /* s3 = unit_x */
3904    li       v0, BILINEAR_INTERPOLATION_RANGE
3905    li       s8, 0x00ff00ff
3906
3907    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
3908    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
39090:
3910    andi     t4, s2, 0xffff    /* t4 = (short)vx */
3911    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
3912    subu     t5, v0, t4        /* t5 = ( 256 - (vx>>8)) */
3913
3914    mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
3915    mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
3916    mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
3917    mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
3918
3919    sra      t9, s2, 16
3920    sll      t9, t9, 2
3921    addiu    t8, t9, 4
3922    lwx      t0, t9(a2)        /* t0 = tl */
3923    lwx      t1, t8(a2)        /* t1 = tr */
3924    addiu    v1, v1, -1
3925    lwx      t2, t9(a3)        /* t2 = bl */
3926    lwx      t3, t8(a3)        /* t3 = br */
3927
3928    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
3929    lbu      t1, 0(a1)         /* t1 = mask */
3930    addiu    a1, a1, 1
3931    MIPS_UN8x4_MUL_UN8 t0, t1, t0, s8, t2, t3, t4
3932
3933    addu     s2, s2, s3        /* vx += unit_x; */
3934    sw       t0, 0(a0)
3935    bnez     v1, 0b
3936     addiu   a0, a0, 4
3937
3938    RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
39391:
3940    j        ra
3941     nop
3942
3943END(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips)
3944
3945LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_mips)
3946/*
3947 * a0     - *dst
3948 * a1     - *mask
3949 * a2     - *src_top
3950 * a3     - *src_bottom
3951 * 16(sp) - wt
3952 * 20(sp) - wb
3953 * 24(sp) - vx
3954 * 28(sp) - unit_x
3955 * 32(sp) - w
3956 */
3957
3958    lw       v1, 32(sp)
3959    beqz     v1, 1f
3960     nop
3961
3962    SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
3963
3964    lw       s0, 44(sp)        /* s0 = wt */
3965    lw       s1, 48(sp)        /* s1 = wb */
3966    lw       s2, 52(sp)        /* s2 = vx */
3967    lw       s3, 56(sp)        /* s3 = unit_x */
3968    li       v0, BILINEAR_INTERPOLATION_RANGE
3969    li       s8, 0x00ff00ff
3970
3971    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
3972    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
39730:
3974    andi     t4, s2, 0xffff    /* t4 = (short)vx */
3975    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
3976    subu     t5, v0, t4        /* t5 = ( 256 - (vx>>8)) */
3977
3978    mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
3979    mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
3980    mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
3981    mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
3982
3983    sra      t9, s2, 16
3984    sll      t9, t9, 2
3985    addiu    t8, t9, 4
3986    lwx      t0, t9(a2)        /* t0 = tl */
3987    lwx      t1, t8(a2)        /* t1 = tr */
3988    addiu    v1, v1, -1
3989    lwx      t2, t9(a3)        /* t2 = bl */
3990    lwx      t3, t8(a3)        /* t3 = br */
3991
3992    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
3993    lbu      t1, 0(a1)         /* t1 = mask */
3994    addiu    a1, a1, 1
3995    MIPS_UN8x4_MUL_UN8 t0, t1, t0, s8, t2, t3, t4
3996    CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
3997
3998    addu     s2, s2, s3        /* vx += unit_x; */
3999    sh       t1, 0(a0)
4000    bnez     v1, 0b
4001     addiu   a0, a0, 2
4002
4003    RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
40041:
4005    j        ra
4006     nop
4007
4008END(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_mips)
4009
4010LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_mips)
4011/*
4012 * a0     - *dst
4013 * a1     - *mask
4014 * a2     - *src_top
4015 * a3     - *src_bottom
4016 * 16(sp) - wt
4017 * 20(sp) - wb
4018 * 24(sp) - vx
4019 * 28(sp) - unit_x
4020 * 32(sp) - w
4021 */
4022
4023    lw       t0, 32(sp)
4024    beqz     t0, 1f
4025     nop
4026
4027    SAVE_REGS_ON_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra
4028
4029    lw       s0, 48(sp)        /* s0 = wt */
4030    lw       s1, 52(sp)        /* s1 = wb */
4031    lw       s2, 56(sp)        /* s2 = vx */
4032    lw       s3, 60(sp)        /* s3 = unit_x */
4033    lw       ra, 64(sp)        /* ra = w */
4034    li       v0, 0x00ff00ff
4035    li       v1, 0x07e007e0
4036    li       s8, 0x001f001f
4037
4038    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
4039    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
40400:
4041    andi     t4, s2, 0xffff    /* t4 = (short)vx */
4042    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
4043    li       t5, BILINEAR_INTERPOLATION_RANGE
4044    subu     t5, t5, t4        /* t5 = ( 256 - (vx>>8)) */
4045
4046    mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
4047    mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
4048    mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
4049    mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
4050
4051    sra      t9, s2, 16
4052    sll      t9, t9, 1
4053    addiu    t8, t9, 2
4054    lhx      t0, t9(a2)        /* t0 = tl */
4055    lhx      t1, t8(a2)        /* t1 = tr */
4056    andi     t1, t1, 0xffff
4057    addiu    ra, ra, -1
4058    lhx      t2, t9(a3)        /* t2 = bl */
4059    lhx      t3, t8(a3)        /* t3 = br */
4060    andi     t3, t3, 0xffff
4061
4062    CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
4063    CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
4064    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
4065    lbu      t1, 0(a1)         /* t1 = mask */
4066    addiu    a1, a1, 1
4067    MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t2, t3, t4
4068
4069    addu     s2, s2, s3        /* vx += unit_x; */
4070    sw       t0, 0(a0)
4071    bnez     ra, 0b
4072     addiu   a0, a0, 4
4073
4074    RESTORE_REGS_FROM_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra
40751:
4076    j        ra
4077     nop
4078
4079END(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_mips)
4080
4081LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_mips)
4082/*
4083 * a0     - *dst
4084 * a1     - *mask
4085 * a2     - *src_top
4086 * a3     - *src_bottom
4087 * 16(sp) - wt
4088 * 20(sp) - wb
4089 * 24(sp) - vx
4090 * 28(sp) - unit_x
4091 * 32(sp) - w
4092 */
4093
4094    lw       t0, 32(sp)
4095    beqz     t0, 1f
4096     nop
4097
4098    SAVE_REGS_ON_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra
4099
4100    lw       s0, 48(sp)        /* s0 = wt */
4101    lw       s1, 52(sp)        /* s1 = wb */
4102    lw       s2, 56(sp)        /* s2 = vx */
4103    lw       s3, 60(sp)        /* s3 = unit_x */
4104    lw       ra, 64(sp)        /* ra = w */
4105    li       v0, 0x00ff00ff
4106    li       v1, 0x07e007e0
4107    li       s8, 0x001f001f
4108
4109    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
4110    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
41110:
4112    andi     t4, s2, 0xffff    /* t4 = (short)vx */
4113    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
4114    li       t5, BILINEAR_INTERPOLATION_RANGE
4115    subu     t5, t5, t4        /* t5 = ( 256 - (vx>>8)) */
4116
4117    mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
4118    mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
4119    mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
4120    mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
4121
4122    sra      t9, s2, 16
4123    sll      t9, t9, 1
4124    addiu    t8, t9, 2
4125    lhx      t0, t9(a2)        /* t0 = tl */
4126    lhx      t1, t8(a2)        /* t1 = tr */
4127    andi     t1, t1, 0xffff
4128    addiu    ra, ra, -1
4129    lhx      t2, t9(a3)        /* t2 = bl */
4130    lhx      t3, t8(a3)        /* t3 = br */
4131    andi     t3, t3, 0xffff
4132
4133    CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
4134    CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
4135    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
4136    lbu      t1, 0(a1)         /* t1 = mask */
4137    addiu    a1, a1, 1
4138    MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t2, t3, t4
4139    CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
4140
4141    addu     s2, s2, s3        /* vx += unit_x; */
4142    sh       t1, 0(a0)
4143    bnez     ra, 0b
4144     addiu   a0, a0, 2
4145
4146    RESTORE_REGS_FROM_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra
41471:
4148    j        ra
4149     nop
4150
4151END(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_mips)
4152
4153LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips)
4154/*
4155 * a0     - dst        (a8r8g8b8)
4156 * a1     - mask       (a8)
4157 * a2     - src_top    (a8r8g8b8)
4158 * a3     - src_bottom (a8r8g8b8)
4159 * 16(sp) - wt
4160 * 20(sp) - wb
4161 * 24(sp) - vx
4162 * 28(sp) - unit_x
4163 * 32(sp) - w
4164 */
4165
4166    SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
4167
4168    lw       v1, 60(sp)        /* v1 = w(sp + 32 + 28 save regs stack offset)*/
4169    beqz     v1, 1f
4170     nop
4171
4172    lw       s0, 44(sp)        /* s0 = wt */
4173    lw       s1, 48(sp)        /* s1 = wb */
4174    lw       s2, 52(sp)        /* s2 = vx */
4175    lw       s3, 56(sp)        /* s3 = unit_x */
4176    li       v0, BILINEAR_INTERPOLATION_RANGE
4177    li       s8, 0x00ff00ff
4178
4179    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
4180    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
4181
41820:
4183    andi     t4, s2, 0xffff    /* t4 = (short)vx */
4184    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
4185    subu     t5, v0, t4        /* t5 = ( 256 - (vx>>8)) */
4186
4187    mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
4188    mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
4189    mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
4190    mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
4191
4192    sra      t9, s2, 16
4193    sll      t9, t9, 2
4194    addiu    t8, t9, 4
4195    lwx      t0, t9(a2)        /* t0 = tl */
4196    lwx      t1, t8(a2)        /* t1 = tr */
4197    addiu    v1, v1, -1
4198    lwx      t2, t9(a3)        /* t2 = bl */
4199    lwx      t3, t8(a3)        /* t3 = br */
4200
4201    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, \
4202                                      t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
4203    lbu      t1, 0(a1)         /* t1 = mask */
4204    lw       t2, 0(a0)         /* t2 = dst */
4205    addiu    a1, a1, 1
4206    OVER_8888_8_8888 t0, t1, t2, t0, s8, t3, t4, t5, t6
4207
4208    addu     s2, s2, s3        /* vx += unit_x; */
4209    sw       t0, 0(a0)
4210    bnez     v1, 0b
4211     addiu   a0, a0, 4
4212
42131:
4214    RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
4215    j        ra
4216     nop
4217
4218END(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips)
4219
4220LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_mips)
4221/*
4222 * a0     - *dst
4223 * a1     - *mask
4224 * a2     - *src_top
4225 * a3     - *src_bottom
4226 * 16(sp) - wt
4227 * 20(sp) - wb
4228 * 24(sp) - vx
4229 * 28(sp) - unit_x
4230 * 32(sp) - w
4231 */
4232
4233    lw       v1, 32(sp)
4234    beqz     v1, 1f
4235     nop
4236
4237    SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
4238
4239    lw       s0, 44(sp)        /* s0 = wt */
4240    lw       s1, 48(sp)        /* s1 = wb */
4241    lw       s2, 52(sp)        /* s2 = vx */
4242    lw       s3, 56(sp)        /* s3 = unit_x */
4243    li       v0, BILINEAR_INTERPOLATION_RANGE
4244    li       s8, 0x00ff00ff
4245
4246    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
4247    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
42480:
4249    andi     t4, s2, 0xffff    /* t4 = (short)vx */
4250    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
4251    subu     t5, v0, t4        /* t5 = ( 256 - (vx>>8)) */
4252
4253    mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
4254    mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
4255    mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
4256    mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
4257
4258    sra      t9, s2, 16
4259    sll      t9, t9, 2
4260    addiu    t8, t9, 4
4261    lwx      t0, t9(a2)        /* t0 = tl */
4262    lwx      t1, t8(a2)        /* t1 = tr */
4263    addiu    v1, v1, -1
4264    lwx      t2, t9(a3)        /* t2 = bl */
4265    lwx      t3, t8(a3)        /* t3 = br */
4266
4267    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
4268    lbu      t1, 0(a1)         /* t1 = mask */
4269    lw       t2, 0(a0)         /* t2 = dst */
4270    addiu    a1, a1, 1
4271    MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t0, s8, t3, t4, t5
4272
4273    addu     s2, s2, s3        /* vx += unit_x; */
4274    sw       t0, 0(a0)
4275    bnez     v1, 0b
4276     addiu   a0, a0, 4
4277
4278    RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
42791:
4280    j        ra
4281     nop
4282
4283END(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_mips)
4284