1/*
2 * Copyright 2014 The Android Open Source Project
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "SkBlitRow.h"
9#include "SkBlitMask.h"
10#include "SkColorPriv.h"
11#include "SkDither.h"
12#include "SkMathPriv.h"
13
14static void S32_D565_Blend_mips_dsp(uint16_t* SK_RESTRICT dst,
15                                    const SkPMColor* SK_RESTRICT src, int count,
16                                    U8CPU alpha, int /*x*/, int /*y*/) {
17    register uint32_t t0, t1, t2, t3, t4, t5, t6;
18    register uint32_t s0, s1, s2, s4, s5, s6;
19
20    alpha += 1;
21    if (count >= 2) {
22        __asm__ volatile (
23           ".set             push                          \n\t"
24           ".set             noreorder                     \n\t"
25            "sll             %[s4],    %[alpha], 8         \n\t"
26            "or              %[s4],    %[s4],    %[alpha]  \n\t"
27            "repl.ph         %[s5],    0x1f                \n\t"
28            "repl.ph         %[s6],    0x3f                \n\t"
29        "1:                                                \n\t"
30            "lw              %[s2],    0(%[src])           \n\t"
31            "lw              %[s1],    4(%[src])           \n\t"
32            "lwr             %[s0],    0(%[dst])           \n\t"
33            "lwl             %[s0],    3(%[dst])           \n\t"
34            "and             %[t1],    %[s0],    %[s5]     \n\t"
35            "shra.ph         %[t0],    %[s0],    5         \n\t"
36            "and             %[t2],    %[t0],    %[s6]     \n\t"
37#ifdef __mips_dspr2
38            "shrl.ph         %[t3],    %[s0],    11        \n\t"
39#else
40            "shra.ph         %[t0],    %[s0],    11        \n\t"
41            "and             %[t3],    %[t0],    %[s5]     \n\t"
42#endif
43            "precrq.ph.w     %[t0],    %[s1],    %[s2]     \n\t"
44            "shrl.qb         %[t5],    %[t0],    3         \n\t"
45            "and             %[t4],    %[t5],    %[s5]     \n\t"
46            "ins             %[s2],    %[s1],    16, 16    \n\t"
47            "preceu.ph.qbra  %[t0],    %[s2]               \n\t"
48            "shrl.qb         %[t6],    %[t0],    3         \n\t"
49#ifdef __mips_dspr2
50            "shrl.ph         %[t5],    %[s2],    10        \n\t"
51#else
52            "shra.ph         %[t0],    %[s2],    10        \n\t"
53            "and             %[t5],    %[t0],    %[s6]     \n\t"
54#endif
55            "subu.qb         %[t4],    %[t4],    %[t1]     \n\t"
56            "subu.qb         %[t5],    %[t5],    %[t2]     \n\t"
57            "subu.qb         %[t6],    %[t6],    %[t3]     \n\t"
58            "muleu_s.ph.qbr  %[t4],    %[s4],    %[t4]     \n\t"
59            "muleu_s.ph.qbr  %[t5],    %[s4],    %[t5]     \n\t"
60            "muleu_s.ph.qbr  %[t6],    %[s4],    %[t6]     \n\t"
61            "addiu           %[count], %[count], -2        \n\t"
62            "addiu           %[src],   %[src],   8         \n\t"
63            "shra.ph         %[t4],    %[t4],    8         \n\t"
64            "shra.ph         %[t5],    %[t5],    8         \n\t"
65            "shra.ph         %[t6],    %[t6],    8         \n\t"
66            "addu.qb         %[t4],    %[t4],    %[t1]     \n\t"
67            "addu.qb         %[t5],    %[t5],    %[t2]     \n\t"
68            "addu.qb         %[t6],    %[t6],    %[t3]     \n\t"
69            "andi            %[s0],    %[t4],    0xffff    \n\t"
70            "andi            %[t0],    %[t5],    0xffff    \n\t"
71            "sll             %[t0],    %[t0],    0x5       \n\t"
72            "or              %[s0],    %[s0],    %[t0]     \n\t"
73            "sll             %[t0],    %[t6],    0xb       \n\t"
74            "or              %[t0],    %[t0],    %[s0]     \n\t"
75            "sh              %[t0],    0(%[dst])           \n\t"
76            "srl             %[s1],    %[t4],    16        \n\t"
77            "srl             %[t0],    %[t5],    16        \n\t"
78            "sll             %[t5],    %[t0],    5         \n\t"
79            "or              %[t0],    %[t5],    %[s1]     \n\t"
80            "srl             %[s0],    %[t6],    16        \n\t"
81            "sll             %[s2],    %[s0],    0xb       \n\t"
82            "or              %[s1],    %[s2],    %[t0]     \n\t"
83            "sh              %[s1],    2(%[dst])           \n\t"
84            "bge             %[count], 2,        1b        \n\t"
85            " addiu          %[dst],   %[dst],   4         \n\t"
86            ".set            pop                           \n\t"
87            : [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
88              [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [s0]"=&r"(s0),
89              [s1]"=&r"(s1), [s2]"=&r"(s2), [s4]"=&r"(s4), [s5]"=&r"(s5),
90              [s6]"=&r"(s6), [count]"+r"(count), [dst]"+r"(dst),
91              [src]"+r"(src)
92            : [alpha]"r"(alpha)
93            : "memory", "hi", "lo"
94        );
95    }
96
97    if (count == 1) {
98        SkPMColor c = *src++;
99        SkPMColorAssert(c);
100        SkASSERT(SkGetPackedA32(c) == 255);
101        uint16_t d = *dst;
102        *dst++ = SkPackRGB16(SkAlphaBlend(SkPacked32ToR16(c), SkGetPackedR16(d), alpha),
103                             SkAlphaBlend(SkPacked32ToG16(c), SkGetPackedG16(d), alpha),
104                             SkAlphaBlend(SkPacked32ToB16(c), SkGetPackedB16(d), alpha));
105    }
106}
107
108static void S32A_D565_Opaque_Dither_mips_dsp(uint16_t* __restrict__ dst,
109                                             const SkPMColor* __restrict__ src,
110                                             int count, U8CPU alpha, int x, int y) {
111    __asm__ volatile (
112        "pref  0,   0(%[src])     \n\t"
113        "pref  1,   0(%[dst])     \n\t"
114        "pref  0,   32(%[src])    \n\t"
115        "pref  1,   32(%[dst])    \n\t"
116        :
117        : [src]"r"(src), [dst]"r"(dst)
118        : "memory"
119    );
120
121    register int32_t t0, t1, t2, t3, t4, t5, t6;
122    register int32_t t7, t8, t9, s0, s1, s2, s3;
123    const uint16_t dither_scan = gDitherMatrix_3Bit_16[(y) & 3];
124
125    if (count >= 2) {
126        __asm__ volatile (
127            ".set            push                                \n\t"
128            ".set            noreorder                           \n\t"
129            "li              %[s1],    0x01010101                \n\t"
130            "li              %[s2],    -2017                     \n\t"
131        "1:                                                      \n\t"
132            "bnez            %[s3],    4f                        \n\t"
133            " li             %[s3],    2                         \n\t"
134            "pref            0,        64(%[src])                \n\t"
135            "pref            1,        64(%[dst])                \n\t"
136        "4:                                                      \n\t"
137            "addiu           %[s3],    %[s3],    -1              \n\t"
138            "lw              %[t1],    0(%[src])                 \n\t"
139            "andi            %[t3],    %[x],     0x3             \n\t"
140            "addiu           %[x],     %[x],     1               \n\t"
141            "sll             %[t4],    %[t3],    2               \n\t"
142            "srav            %[t5],    %[dither_scan], %[t4]     \n\t"
143            "andi            %[t3],    %[t5],    0xf             \n\t"
144            "lw              %[t2],    4(%[src])                 \n\t"
145            "andi            %[t4],    %[x],     0x3             \n\t"
146            "sll             %[t5],    %[t4],    2               \n\t"
147            "srav            %[t6],    %[dither_scan], %[t5]     \n\t"
148            "addiu           %[x],     %[x],     1               \n\t"
149            "ins             %[t3],    %[t6],    8,    4         \n\t"
150            "srl             %[t4],    %[t1],    24              \n\t"
151            "addiu           %[t0],    %[t4],    1               \n\t"
152            "srl             %[t4],    %[t2],    24              \n\t"
153            "addiu           %[t5],    %[t4],    1               \n\t"
154            "ins             %[t0],    %[t5],    16,   16        \n\t"
155            "muleu_s.ph.qbr  %[t4],    %[t3],    %[t0]           \n\t"
156            "preceu.ph.qbla  %[t3],    %[t4]                     \n\t"
157            "andi            %[t4],    %[t1],    0xff            \n\t"
158            "ins             %[t4],    %[t2],    16,   8         \n\t"
159            "shrl.qb         %[t5],    %[t4],    5               \n\t"
160            "subu.qb         %[t6],    %[t3],    %[t5]           \n\t"
161            "addq.ph         %[t5],    %[t6],    %[t4]           \n\t"
162            "ext             %[t4],    %[t1],    8,    8         \n\t"
163            "srl             %[t6],    %[t2],    8               \n\t"
164            "ins             %[t4],    %[t6],    16,   8         \n\t"
165            "shrl.qb         %[t6],    %[t4],    6               \n\t"
166            "shrl.qb         %[t7],    %[t3],    1               \n\t"
167            "subu.qb         %[t8],    %[t7],    %[t6]           \n\t"
168            "addq.ph         %[t6],    %[t8],    %[t4]           \n\t"
169            "ext             %[t4],    %[t1],    16,   8         \n\t"
170            "srl             %[t7],    %[t2],    16              \n\t"
171            "ins             %[t4],    %[t7],    16,   8         \n\t"
172            "shrl.qb         %[t7],    %[t4],    5               \n\t"
173            "subu.qb         %[t8],    %[t3],    %[t7]           \n\t"
174            "addq.ph         %[t7],    %[t8],    %[t4]           \n\t"
175            "shll.ph         %[t4],    %[t7],    2               \n\t"
176            "andi            %[t9],    %[t4],    0xffff          \n\t"
177            "srl             %[s0],    %[t4],    16              \n\t"
178            "andi            %[t3],    %[t6],    0xffff          \n\t"
179            "srl             %[t4],    %[t6],    16              \n\t"
180            "andi            %[t6],    %[t5],    0xffff          \n\t"
181            "srl             %[t7],    %[t5],    16              \n\t"
182            "subq.ph         %[t5],    %[s1],    %[t0]           \n\t"
183            "srl             %[t0],    %[t5],    3               \n\t"
184            "beqz            %[t1],    3f                        \n\t"
185            " lhu            %[t5],    0(%[dst])                 \n\t"
186            "sll             %[t1],    %[t6],    13              \n\t"
187            "or              %[t8],    %[t9],    %[t1]           \n\t"
188            "sll             %[t1],    %[t3],    24              \n\t"
189            "or              %[t9],    %[t1],    %[t8]           \n\t"
190            "andi            %[t3],    %[t5],    0x7e0           \n\t"
191            "sll             %[t6],    %[t3],    0x10            \n\t"
192            "and             %[t8],    %[s2],    %[t5]           \n\t"
193            "or              %[t5],    %[t6],    %[t8]           \n\t"
194            "andi            %[t6],    %[t0],    0xff            \n\t"
195            "mul             %[t1],    %[t6],    %[t5]           \n\t"
196            "addu            %[t5],    %[t1],    %[t9]           \n\t"
197            "srl             %[t6],    %[t5],    5               \n\t"
198            "and             %[t5],    %[s2],    %[t6]           \n\t"
199            "srl             %[t8],    %[t6],    16              \n\t"
200            "andi            %[t6],    %[t8],    0x7e0           \n\t"
201            "or              %[t1],    %[t5],    %[t6]           \n\t"
202            "sh              %[t1],    0(%[dst])                 \n\t"
203        "3:                                                      \n\t"
204            "beqz            %[t2],    2f                        \n\t"
205            " lhu            %[t5],    2(%[dst])                 \n\t"
206            "sll             %[t1],    %[t7],    13              \n\t"
207            "or              %[t8],    %[s0],    %[t1]           \n\t"
208            "sll             %[t1],    %[t4],    24              \n\t"
209            "or              %[t9],    %[t1],    %[t8]           \n\t"
210            "andi            %[t3],    %[t5],    0x7e0           \n\t"
211            "sll             %[t6],    %[t3],    0x10            \n\t"
212            "and             %[t8],    %[s2],    %[t5]           \n\t"
213            "or              %[t5],    %[t6],    %[t8]           \n\t"
214            "srl             %[t6],    %[t0],    16              \n\t"
215            "mul             %[t1],    %[t6],    %[t5]           \n\t"
216            "addu            %[t5],    %[t1],    %[t9]           \n\t"
217            "srl             %[t6],    %[t5],    5               \n\t"
218            "and             %[t5],    %[s2],    %[t6]           \n\t"
219            "srl             %[t8],    %[t6],    16              \n\t"
220            "andi            %[t6],    %[t8],    0x7e0           \n\t"
221            "or              %[t1],    %[t5],    %[t6]           \n\t"
222            "sh              %[t1],    2(%[dst])                 \n\t"
223        "2:                                                      \n\t"
224            "addiu           %[count], %[count], -2              \n\t"
225            "addiu           %[src],   %[src],   8               \n\t"
226            "addiu           %[t1],    %[count], -1              \n\t"
227            "bgtz            %[t1],    1b                        \n\t"
228            " addiu          %[dst],  %[dst],    4               \n\t"
229            ".set            pop                                 \n\t"
230            : [src]"+r"(src), [count]"+r"(count), [dst]"+r"(dst), [x]"+r"(x),
231              [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
232              [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7),
233              [t8]"=&r"(t8),  [t9]"=&r"(t9), [s0]"=&r"(s0), [s1]"=&r"(s1),
234              [s2]"=&r"(s2), [s3]"=&r"(s3)
235            : [dither_scan]"r"(dither_scan)
236            : "memory", "hi", "lo"
237        );
238    }
239
240    if (count == 1) {
241        SkPMColor c = *src++;
242        SkPMColorAssert(c);
243        if (c) {
244            unsigned a = SkGetPackedA32(c);
245            int d = SkAlphaMul(DITHER_VALUE(x), SkAlpha255To256(a));
246
247            unsigned sr = SkGetPackedR32(c);
248            unsigned sg = SkGetPackedG32(c);
249            unsigned sb = SkGetPackedB32(c);
250            sr = SkDITHER_R32_FOR_565(sr, d);
251            sg = SkDITHER_G32_FOR_565(sg, d);
252            sb = SkDITHER_B32_FOR_565(sb, d);
253
254            uint32_t src_expanded = (sg << 24) | (sr << 13) | (sb << 2);
255            uint32_t dst_expanded = SkExpand_rgb_16(*dst);
256            dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3);
257            // now src and dst expanded are in g:11 r:10 x:1 b:10
258            *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5);
259        }
260        dst += 1;
261        DITHER_INC_X(x);
262    }
263}
264
265static void S32_D565_Opaque_Dither_mips_dsp(uint16_t* __restrict__ dst,
266                                            const SkPMColor* __restrict__ src,
267                                            int count, U8CPU alpha, int x, int y) {
268    uint16_t dither_scan = gDitherMatrix_3Bit_16[(y) & 3];
269    register uint32_t t0, t1, t2, t3, t4, t5;
270    register uint32_t t6, t7, t8, t9, s0;
271    int dither[4];
272    int i;
273
274    for (i = 0; i < 4; i++, x++) {
275        dither[i] = (dither_scan >> ((x & 3) << 2)) & 0xF;
276    }
277
278    __asm__ volatile (
279        ".set            push                          \n\t"
280        ".set            noreorder                     \n\t"
281        "li              %[s0],    1                   \n\t"
282    "2:                                                \n\t"
283        "beqz            %[count], 1f                  \n\t"
284        " nop                                          \n\t"
285        "addiu           %[t0],    %[count], -1        \n\t"
286        "beqz            %[t0],    1f                  \n\t"
287        " nop                                          \n\t"
288        "beqz            %[s0],    3f                  \n\t"
289        " nop                                          \n\t"
290        "lw              %[t0],    0(%[dither])        \n\t"
291        "lw              %[t1],    4(%[dither])        \n\t"
292        "li              %[s0],    0                   \n\t"
293        "b               4f                            \n\t"
294        " nop                                          \n\t"
295    "3:                                                \n\t"
296        "lw              %[t0],    8(%[dither])        \n\t"
297        "lw              %[t1],    12(%[dither])       \n\t"
298        "li              %[s0],    1                   \n\t"
299    "4:                                                \n\t"
300        "sll             %[t2],    %[t0],    16        \n\t"
301        "or              %[t1],    %[t2],    %[t1]     \n\t"
302        "lw              %[t0],    0(%[src])           \n\t"
303        "lw              %[t2],    4(%[src])           \n\t"
304        "precrq.ph.w     %[t3],    %[t0],    %[t2]     \n\t"
305        "preceu.ph.qbra  %[t9],    %[t3]               \n\t"
306#ifdef __mips_dspr2
307        "append          %[t0],    %[t2],    16        \n\t"
308        "preceu.ph.qbra  %[t4],    %[t0]               \n\t"
309        "preceu.ph.qbla  %[t5],    %[t0]               \n\t"
310#else
311        "sll             %[t6],    %[t0],    16        \n\t"
312        "sll             %[t7],    %[t2],    16        \n\t"
313        "precrq.ph.w     %[t8],    %[t6],    %[t7]     \n\t"
314        "preceu.ph.qbra  %[t4],    %[t8]               \n\t"
315        "preceu.ph.qbla  %[t5],    %[t8]               \n\t"
316#endif
317        "addu.qb         %[t0],    %[t4],    %[t1]     \n\t"
318        "shra.ph         %[t2],    %[t4],    5         \n\t"
319        "subu.qb         %[t3],    %[t0],    %[t2]     \n\t"
320        "shra.ph         %[t6],    %[t3],    3         \n\t"
321        "addu.qb         %[t0],    %[t9],    %[t1]     \n\t"
322        "shra.ph         %[t2],    %[t9],    5         \n\t"
323        "subu.qb         %[t3],    %[t0],    %[t2]     \n\t"
324        "shra.ph         %[t7],    %[t3],    3         \n\t"
325        "shra.ph         %[t0],    %[t1],    1         \n\t"
326        "shra.ph         %[t2],    %[t5],    6         \n\t"
327        "addu.qb         %[t3],    %[t5],    %[t0]     \n\t"
328        "subu.qb         %[t4],    %[t3],    %[t2]     \n\t"
329        "shra.ph         %[t8],    %[t4],    2         \n\t"
330        "precrq.ph.w     %[t0],    %[t6],    %[t7]     \n\t"
331#ifdef __mips_dspr2
332        "append          %[t6],    %[t7],    16        \n\t"
333#else
334        "sll             %[t6],    %[t6],    16        \n\t"
335        "sll             %[t2],    %[t7],    16        \n\t"
336        "precrq.ph.w     %[t6],    %[t6],    %[t2]     \n\t"
337#endif
338        "sra             %[t4],    %[t8],    16        \n\t"
339        "andi            %[t5],    %[t8],    0xFF      \n\t"
340        "sll             %[t7],    %[t4],    5         \n\t"
341        "sra             %[t8],    %[t0],    5         \n\t"
342        "or              %[t9],    %[t7],    %[t8]     \n\t"
343        "or              %[t3],    %[t9],    %[t0]     \n\t"
344        "andi            %[t4],    %[t3],    0xFFFF    \n\t"
345        "sll             %[t7],    %[t5],    5         \n\t"
346        "sra             %[t8],    %[t6],    5         \n\t"
347        "or              %[t9],    %[t7],    %[t8]     \n\t"
348        "or              %[t3],    %[t9],    %[t6]     \n\t"
349        "and             %[t7],    %[t3],    0xFFFF    \n\t"
350        "sh              %[t4],    0(%[dst])           \n\t"
351        "sh              %[t7],    2(%[dst])           \n\t"
352        "addiu           %[count], %[count], -2        \n\t"
353        "addiu           %[src],   %[src],   8         \n\t"
354        "b               2b                            \n\t"
355        " addiu          %[dst],   %[dst],   4         \n\t"
356    "1:                                                \n\t"
357        ".set            pop                           \n\t"
358        : [dst]"+r"(dst), [src]"+r"(src), [count]"+r"(count),
359          [x]"+r"(x), [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2),
360          [t3]"=&r"(t3), [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6),
361          [t7]"=&r"(t7), [t8]"=&r"(t8), [t9]"=&r"(t9), [s0]"=&r"(s0)
362        : [dither] "r" (dither)
363        : "memory"
364    );
365
366    if (count == 1) {
367        SkPMColor c = *src++;
368        SkPMColorAssert(c); // only if DEBUG is turned on
369        SkASSERT(SkGetPackedA32(c) == 255);
370        unsigned dither = DITHER_VALUE(x);
371        *dst++ = SkDitherRGB32To565(c, dither);
372    }
373}
374
375static void S32_D565_Blend_Dither_mips_dsp(uint16_t* dst,
376                                           const SkPMColor* src,
377                                           int count, U8CPU alpha, int x, int y) {
378    register int32_t t0, t1, t2, t3, t4, t5, t6;
379    register int32_t s0, s1, s2, s3;
380    register int x1 = 0;
381    register uint32_t sc_mul;
382    register uint32_t sc_add;
383#ifdef ENABLE_DITHER_MATRIX_4X4
384    const uint8_t* dither_scan = gDitherMatrix_3Bit_4X4[(y) & 3];
385#else // ENABLE_DITHER_MATRIX_4X4
386    const uint16_t dither_scan = gDitherMatrix_3Bit_16[(y) & 3];
387#endif // ENABLE_DITHER_MATRIX_4X4
388    int dither[4];
389
390    for (int i = 0; i < 4; i++) {
391        dither[i] = (dither_scan >> ((x & 3) << 2)) & 0xF;
392        x += 1;
393    }
394    alpha += 1;
395    __asm__ volatile (
396        ".set            push                              \n\t"
397        ".set            noreorder                         \n\t"
398        "li              %[t0],     0x100                  \n\t"
399        "subu            %[t0],     %[t0],     %[alpha]    \n\t"
400        "replv.ph        %[sc_mul], %[alpha]               \n\t"
401        "beqz            %[alpha],  1f                     \n\t"
402        " nop                                              \n\t"
403        "replv.qb        %[sc_add], %[t0]                  \n\t"
404        "b               2f                                \n\t"
405        " nop                                              \n\t"
406    "1:                                                    \n\t"
407        "replv.qb        %[sc_add], %[alpha]               \n\t"
408    "2:                                                    \n\t"
409        "addiu           %[t2],     %[count],  -1          \n\t"
410        "blez            %[t2],     3f                     \n\t"
411        " nop                                              \n\t"
412        "lw              %[s0],     0(%[src])              \n\t"
413        "lw              %[s1],     4(%[src])              \n\t"
414        "bnez            %[x1],     4f                     \n\t"
415        " nop                                              \n\t"
416        "lw              %[t0],     0(%[dither])           \n\t"
417        "lw              %[t1],     4(%[dither])           \n\t"
418        "li              %[x1],     1                      \n\t"
419        "b               5f                                \n\t"
420        " nop                                              \n\t"
421    "4:                                                    \n\t"
422        "lw              %[t0],     8(%[dither])           \n\t"
423        "lw              %[t1],     12(%[dither])          \n\t"
424        "li              %[x1],     0                      \n\t"
425    "5:                                                    \n\t"
426        "sll             %[t3],     %[t0],     7           \n\t"
427        "sll             %[t4],     %[t1],     7           \n\t"
428#ifdef __mips_dspr2
429        "append          %[t0],     %[t1],     16          \n\t"
430#else
431        "sll             %[t0],     %[t0],     8           \n\t"
432        "sll             %[t2],     %[t1],     8           \n\t"
433        "precrq.qb.ph    %[t0],     %[t0],     %[t2]       \n\t"
434#endif
435        "precrq.qb.ph    %[t1],     %[t3],     %[t4]       \n\t"
436        "sll             %[t5],     %[s0],     8           \n\t"
437        "sll             %[t6],     %[s1],     8           \n\t"
438        "precrq.qb.ph    %[t4],     %[t5],     %[t6]       \n\t"
439        "precrq.qb.ph    %[t6],     %[s0],     %[s1]       \n\t"
440        "preceu.ph.qbla  %[t5],     %[t4]                  \n\t"
441        "preceu.ph.qbra  %[t4],     %[t4]                  \n\t"
442        "preceu.ph.qbra  %[t6],     %[t6]                  \n\t"
443        "lh              %[t2],     0(%[dst])              \n\t"
444        "lh              %[s1],     2(%[dst])              \n\t"
445#ifdef __mips_dspr2
446        "append          %[t2],     %[s1],     16          \n\t"
447#else
448        "sll             %[s1],     %[s1],     16          \n\t"
449        "packrl.ph       %[t2],     %[t2],     %[s1]       \n\t"
450#endif
451        "shra.ph         %[s1],     %[t2],     11          \n\t"
452        "and             %[s1],     %[s1],     0x1F001F    \n\t"
453        "shra.ph         %[s2],     %[t2],     5           \n\t"
454        "and             %[s2],     %[s2],     0x3F003F    \n\t"
455        "and             %[s3],     %[t2],     0x1F001F    \n\t"
456        "shrl.qb         %[t3],     %[t4],     5           \n\t"
457        "addu.qb         %[t4],     %[t4],     %[t0]       \n\t"
458        "subu.qb         %[t4],     %[t4],     %[t3]       \n\t"
459        "shrl.qb         %[t4],     %[t4],     3           \n\t"
460        "shrl.qb         %[t3],     %[t5],     5           \n\t"
461        "addu.qb         %[t5],     %[t5],     %[t0]       \n\t"
462        "subu.qb         %[t5],     %[t5],     %[t3]       \n\t"
463        "shrl.qb         %[t5],     %[t5],     3           \n\t"
464        "shrl.qb         %[t3],     %[t6],     6           \n\t"
465        "addu.qb         %[t6],     %[t6],     %[t1]       \n\t"
466        "subu.qb         %[t6],     %[t6],     %[t3]       \n\t"
467        "shrl.qb         %[t6],     %[t6],     2           \n\t"
468        "cmpu.lt.qb      %[t4],     %[s1]                  \n\t"
469        "pick.qb         %[s0],     %[sc_add], $0          \n\t"
470        "addu.qb         %[s0],     %[s0],     %[s1]       \n\t"
471        "subu.qb         %[t4],     %[t4],     %[s1]       \n\t"
472        "muleu_s.ph.qbl  %[t0],     %[t4],     %[sc_mul]   \n\t"
473        "muleu_s.ph.qbr  %[t1],     %[t4],     %[sc_mul]   \n\t"
474        "precrq.qb.ph    %[t4],     %[t0],     %[t1]       \n\t"
475        "addu.qb         %[t4],     %[t4],     %[s0]       \n\t"
476        "cmpu.lt.qb      %[t5],     %[s3]                  \n\t"
477        "pick.qb         %[s0],     %[sc_add], $0          \n\t"
478        "addu.qb         %[s0],     %[s0],     %[s3]       \n\t"
479        "subu.qb         %[t5],     %[t5],     %[s3]       \n\t"
480        "muleu_s.ph.qbl  %[t0],     %[t5],     %[sc_mul]   \n\t"
481        "muleu_s.ph.qbr  %[t1],     %[t5],     %[sc_mul]   \n\t"
482        "precrq.qb.ph    %[t5],     %[t0],     %[t1]       \n\t"
483        "addu.qb         %[t5],     %[t5],     %[s0]       \n\t"
484        "cmpu.lt.qb      %[t6],     %[s2]                  \n\t"
485        "pick.qb         %[s0],     %[sc_add], $0          \n\t"
486        "addu.qb         %[s0],     %[s0],     %[s2]       \n\t"
487        "subu.qb         %[t6],     %[t6],     %[s2]       \n\t"
488        "muleu_s.ph.qbl  %[t0],     %[t6],     %[sc_mul]   \n\t"
489        "muleu_s.ph.qbr  %[t1],     %[t6],     %[sc_mul]   \n\t"
490        "precrq.qb.ph    %[t6],     %[t0],     %[t1]       \n\t"
491        "addu.qb         %[t6],     %[t6],     %[s0]       \n\t"
492        "shll.ph         %[s1],     %[t4],     11          \n\t"
493        "shll.ph         %[t0],     %[t6],     5           \n\t"
494        "or              %[s0],     %[s1],     %[t0]       \n\t"
495        "or              %[s1],     %[s0],     %[t5]       \n\t"
496        "srl             %[t2],     %[s1],     16          \n\t"
497        "and             %[t3],     %[s1],     0xFFFF      \n\t"
498        "sh              %[t2],     0(%[dst])              \n\t"
499        "sh              %[t3],     2(%[dst])              \n\t"
500        "addiu           %[src],    %[src],    8           \n\t"
501        "addi            %[count],  %[count],  -2          \n\t"
502        "b               2b                                \n\t"
503        " addu           %[dst],    %[dst],    4           \n\t"
504    "3:                                                    \n\t"
505        ".set            pop                               \n\t"
506        : [src]"+r"(src), [dst]"+r"(dst), [count]"+r"(count),
507          [x1]"+r"(x1), [sc_mul]"=&r"(sc_mul), [sc_add]"=&r"(sc_add),
508          [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
509          [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [s0]"=&r"(s0),
510          [s1]"=&r"(s1), [s2]"=&r"(s2), [s3]"=&r"(s3)
511        : [dither]"r"(dither), [alpha]"r"(alpha)
512        : "memory", "hi", "lo"
513    );
514
515    if(count == 1) {
516        SkPMColor c = *src++;
517        SkPMColorAssert(c);
518        SkASSERT(SkGetPackedA32(c) == 255);
519        DITHER_565_SCAN(y);
520        int dither = DITHER_VALUE(x);
521        int sr = SkGetPackedR32(c);
522        int sg = SkGetPackedG32(c);
523        int sb = SkGetPackedB32(c);
524        sr = SkDITHER_R32To565(sr, dither);
525        sg = SkDITHER_G32To565(sg, dither);
526        sb = SkDITHER_B32To565(sb, dither);
527
528        uint16_t d = *dst;
529        *dst++ = SkPackRGB16(SkAlphaBlend(sr, SkGetPackedR16(d), alpha),
530                             SkAlphaBlend(sg, SkGetPackedG16(d), alpha),
531                             SkAlphaBlend(sb, SkGetPackedB16(d), alpha));
532        DITHER_INC_X(x);
533    }
534}
535
536static void S32A_D565_Opaque_mips_dsp(uint16_t* __restrict__ dst,
537                                      const SkPMColor* __restrict__ src,
538                                      int count, U8CPU alpha, int x, int y) {
539
540    __asm__ volatile (
541        "pref  0,  0(%[src])     \n\t"
542        "pref  1,  0(%[dst])     \n\t"
543        "pref  0,  32(%[src])    \n\t"
544        "pref  1,  32(%[dst])    \n\t"
545        :
546        : [src]"r"(src), [dst]"r"(dst)
547        : "memory"
548    );
549
550    register uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8;
551    register uint32_t t16;
552    register uint32_t add_x10 = 0x100010;
553    register uint32_t add_x20 = 0x200020;
554    register uint32_t sa = 0xff00ff;
555
556    __asm__ volatile (
557        ".set           push                            \n\t"
558        ".set           noreorder                       \n\t"
559        "blez           %[count], 1f                    \n\t"
560        " nop                                           \n\t"
561    "2:                                                 \n\t"
562        "beqz           %[count], 1f                    \n\t"
563        " nop                                           \n\t"
564        "addiu          %[t0],    %[count], -1          \n\t"
565        "beqz           %[t0],    1f                    \n\t"
566        " nop                                           \n\t"
567        "bnez           %[t16],   3f                    \n\t"
568        " nop                                           \n\t"
569        "li             %[t16],   2                     \n\t"
570        "pref           0,        64(%[src])            \n\t"
571        "pref           1,        64(%[dst])            \n\t"
572    "3:                                                 \n\t"
573        "addiu          %[t16],   %[t16],   -1          \n\t"
574        "lw             %[t0],    0(%[src])             \n\t"
575        "lw             %[t1],    4(%[src])             \n\t"
576        "precrq.ph.w    %[t2],    %[t0],    %[t1]       \n\t"
577        "preceu.ph.qbra %[t8],    %[t2]                 \n\t"
578#ifdef __mips_dspr2
579        "append         %[t0],    %[t1],    16          \n\t"
580#else
581        "sll            %[t0],    %[t0],    16          \n\t"
582        "sll            %[t6],    %[t1],    16          \n\t"
583        "precrq.ph.w    %[t0],    %[t0],    %[t6]       \n\t"
584#endif
585        "preceu.ph.qbra %[t3],    %[t0]                 \n\t"
586        "preceu.ph.qbla %[t4],    %[t0]                 \n\t"
587        "preceu.ph.qbla %[t0],    %[t2]                 \n\t"
588        "subq.ph        %[t1],    %[sa],    %[t0]       \n\t"
589        "sra            %[t2],    %[t1],    8           \n\t"
590        "or             %[t5],    %[t2],    %[t1]       \n\t"
591        "replv.ph       %[t2],    %[t5]                 \n\t"
592        "lh             %[t0],    0(%[dst])             \n\t"
593        "lh             %[t1],    2(%[dst])             \n\t"
594        "and            %[t1],    %[t1],    0xffff      \n\t"
595#ifdef __mips_dspr2
596        "append         %[t0],    %[t1],    16          \n\t"
597#else
598        "sll            %[t5],    %[t0],    16          \n\t"
599        "or             %[t0],    %[t5],    %[t1]       \n\t"
600#endif
601        "and            %[t1],    %[t0],    0x1f001f    \n\t"
602        "shra.ph        %[t6],    %[t0],    11          \n\t"
603        "and            %[t6],    %[t6],    0x1f001f    \n\t"
604        "and            %[t7],    %[t0],    0x7e007e0   \n\t"
605        "shra.ph        %[t5],    %[t7],    5           \n\t"
606        "muleu_s.ph.qbl %[t0],    %[t2],    %[t6]       \n\t"
607        "addq.ph        %[t7],    %[t0],    %[add_x10]  \n\t"
608        "shra.ph        %[t6],    %[t7],    5           \n\t"
609        "addq.ph        %[t6],    %[t7],    %[t6]       \n\t"
610        "shra.ph        %[t0],    %[t6],    5           \n\t"
611        "addq.ph        %[t7],    %[t0],    %[t3]       \n\t"
612        "shra.ph        %[t6],    %[t7],    3           \n\t"
613        "muleu_s.ph.qbl %[t0],    %[t2],    %[t1]       \n\t"
614        "addq.ph        %[t7],    %[t0],    %[add_x10]  \n\t"
615        "shra.ph        %[t0],    %[t7],    5           \n\t"
616        "addq.ph        %[t7],    %[t7],    %[t0]       \n\t"
617        "shra.ph        %[t0],    %[t7],    5           \n\t"
618        "addq.ph        %[t7],    %[t0],    %[t8]       \n\t"
619        "shra.ph        %[t3],    %[t7],    3           \n\t"
620        "muleu_s.ph.qbl %[t0],    %[t2],    %[t5]       \n\t"
621        "addq.ph        %[t7],    %[t0],    %[add_x20]  \n\t"
622        "shra.ph        %[t0],    %[t7],    6           \n\t"
623        "addq.ph        %[t8],    %[t7],    %[t0]       \n\t"
624        "shra.ph        %[t0],    %[t8],    6           \n\t"
625        "addq.ph        %[t7],    %[t0],    %[t4]       \n\t"
626        "shra.ph        %[t8],    %[t7],    2           \n\t"
627        "shll.ph        %[t0],    %[t8],    5           \n\t"
628        "shll.ph        %[t1],    %[t6],    11          \n\t"
629        "or             %[t2],    %[t0],    %[t1]       \n\t"
630        "or             %[t3],    %[t2],    %[t3]       \n\t"
631        "sra            %[t4],    %[t3],    16          \n\t"
632        "sh             %[t4],    0(%[dst])             \n\t"
633        "sh             %[t3],    2(%[dst])             \n\t"
634        "addiu          %[count], %[count], -2          \n\t"
635        "addiu          %[src],   %[src],   8           \n\t"
636        "b              2b                              \n\t"
637        " addiu         %[dst],   %[dst],   4           \n\t"
638    "1:                                                 \n\t"
639        ".set           pop                             \n\t"
640        : [dst]"+r"(dst), [src]"+r"(src), [count]"+r"(count),
641          [t16]"=&r"(t16), [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2),
642          [t3]"=&r"(t3), [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6),
643          [t7]"=&r"(t7), [t8]"=&r"(t8)
644        : [add_x10]"r"(add_x10), [add_x20]"r"(add_x20), [sa]"r"(sa)
645        : "memory", "hi", "lo"
646    );
647
648    if (count == 1) {
649        SkPMColor c = *src++;
650        SkPMColorAssert(c);
651        if (c) {
652            *dst = SkSrcOver32To16(c, *dst);
653        }
654        dst += 1;
655    }
656}
657
658static void S32A_D565_Blend_mips_dsp(uint16_t* SK_RESTRICT dst,
659                                     const SkPMColor* SK_RESTRICT src, int count,
660                                     U8CPU alpha, int /*x*/, int /*y*/) {
661    register uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
662    register uint32_t  s0, s1, s2, s3;
663    register unsigned dst_scale = 0;
664
665    __asm__ volatile (
666        ".set            push                                       \n\t"
667        ".set            noreorder                                  \n\t"
668        "replv.qb        %[t0],        %[alpha]                     \n\t"
669        "repl.ph         %[t6],        0x80                         \n\t"
670        "repl.ph         %[t7],        0xFF                         \n\t"
671    "1:                                                             \n\t"
672        "addiu           %[t8],        %[count],     -1             \n\t"
673        "blez            %[t8],        2f                           \n\t"
674        " nop                                                       \n\t"
675        "lw              %[t8],        0(%[src])                    \n\t"
676        "lw              %[t9],        4(%[src])                    \n\t"
677        "lh              %[t4],        0(%[dst])                    \n\t"
678        "lh              %[t5],        2(%[dst])                    \n\t"
679        "sll             %[t5],        %[t5],        16             \n\t"
680        "sll             %[t2],        %[t8],        8              \n\t"
681        "sll             %[t3],        %[t9],        8              \n\t"
682        "precrq.qb.ph    %[t1],        %[t2],        %[t3]          \n\t"
683        "precrq.qb.ph    %[t3],        %[t8],        %[t9]          \n\t"
684        "preceu.ph.qbla  %[t8],        %[t3]                        \n\t"
685        "muleu_s.ph.qbr  %[s3],        %[t0],        %[t8]          \n\t"
686        "preceu.ph.qbla  %[t2],        %[t1]                        \n\t"
687        "preceu.ph.qbra  %[t1],        %[t1]                        \n\t"
688        "preceu.ph.qbra  %[t3],        %[t3]                        \n\t"
689        "packrl.ph       %[t9],        %[t4],        %[t5]          \n\t"
690        "shra.ph         %[s0],        %[t9],        11             \n\t"
691        "and             %[s0],        %[s0],        0x1F001F       \n\t"
692        "shra.ph         %[s1],        %[t9],        5              \n\t"
693        "and             %[s1],        %[s1],        0x3F003F       \n\t"
694        "and             %[s2],        %[t9],        0x1F001F       \n\t"
695        "addq.ph         %[s3],        %[s3],        %[t6]          \n\t"
696        "shra.ph         %[t5],        %[s3],        8              \n\t"
697        "and             %[t5],        %[t5],        0xFF00FF       \n\t"
698        "addq.ph         %[dst_scale], %[s3],        %[t5]          \n\t"
699        "shra.ph         %[dst_scale], %[dst_scale], 8              \n\t"
700        "subq_s.ph       %[dst_scale], %[t7],        %[dst_scale]   \n\t"
701        "sll             %[dst_scale], %[dst_scale], 8              \n\t"
702        "precrq.qb.ph    %[dst_scale], %[dst_scale], %[dst_scale]   \n\t"
703        "shrl.qb         %[t1],        %[t1],        3              \n\t"
704        "shrl.qb         %[t2],        %[t2],        3              \n\t"
705        "shrl.qb         %[t3],        %[t3],        2              \n\t"
706        "muleu_s.ph.qbl  %[t1],        %[t0],        %[t1]          \n\t"
707        "muleu_s.ph.qbl  %[t2],        %[t0],        %[t2]          \n\t"
708        "muleu_s.ph.qbl  %[t3],        %[t0],        %[t3]          \n\t"
709        "muleu_s.ph.qbl  %[t8],        %[dst_scale], %[s0]          \n\t"
710        "muleu_s.ph.qbl  %[t9],        %[dst_scale], %[s2]          \n\t"
711        "muleu_s.ph.qbl  %[t4],        %[dst_scale], %[s1]          \n\t"
712        "addq.ph         %[t1],        %[t1],        %[t8]          \n\t"
713        "addq.ph         %[t2],        %[t2],        %[t9]          \n\t"
714        "addq.ph         %[t3],        %[t3],        %[t4]          \n\t"
715        "addq.ph         %[t8],        %[t1],        %[t6]          \n\t"
716        "addq.ph         %[t9],        %[t2],        %[t6]          \n\t"
717        "addq.ph         %[t4],        %[t3],        %[t6]          \n\t"
718        "shra.ph         %[t1],        %[t8],        8              \n\t"
719        "addq.ph         %[t1],        %[t1],        %[t8]          \n\t"
720        "preceu.ph.qbla  %[t1],        %[t1]                        \n\t"
721        "shra.ph         %[t2],        %[t9],        8              \n\t"
722        "addq.ph         %[t2],        %[t2],        %[t9]          \n\t"
723        "preceu.ph.qbla  %[t2],        %[t2]                        \n\t"
724        "shra.ph         %[t3],        %[t4],        8              \n\t"
725        "addq.ph         %[t3],        %[t3],        %[t4]          \n\t"
726        "preceu.ph.qbla  %[t3],        %[t3]                        \n\t"
727        "shll.ph         %[t8],        %[t1],        11             \n\t"
728        "shll.ph         %[t9],        %[t3],        5              \n\t"
729        "or              %[t8],        %[t8],        %[t9]          \n\t"
730        "or              %[s0],        %[t8],        %[t2]          \n\t"
731        "srl             %[t8],        %[s0],        16             \n\t"
732        "and             %[t9],        %[s0],        0xFFFF         \n\t"
733        "sh              %[t8],        0(%[dst])                    \n\t"
734        "sh              %[t9],        2(%[dst])                    \n\t"
735        "addiu           %[src],       %[src],       8              \n\t"
736        "addiu           %[count],     %[count],     -2             \n\t"
737        "b               1b                                         \n\t"
738        " addiu          %[dst],       %[dst],       4              \n\t"
739    "2:                                                             \n\t"
740        ".set            pop                                        \n\t"
741        : [src]"+r"(src), [dst]"+r"(dst), [count]"+r"(count),
742          [dst_scale]"+r"(dst_scale), [s0]"=&r"(s0), [s1]"=&r"(s1),
743          [s2]"=&r"(s2), [s3]"=&r"(s3), [t0]"=&r"(t0), [t1]"=&r"(t1),
744          [t2]"=&r"(t2), [t3]"=&r"(t3), [t4]"=&r"(t4), [t5]"=&r"(t5),
745          [t6]"=&r"(t6), [t7]"=&r"(t7), [t8]"=&r"(t8), [t9]"=&r"(t9)
746        : [alpha]"r"(alpha)
747        : "memory", "hi", "lo"
748    );
749
750    if (count == 1) {
751        SkPMColor sc = *src++;
752        SkPMColorAssert(sc);
753        if (sc) {
754            uint16_t dc = *dst;
755            unsigned dst_scale = 255 - SkMulDiv255Round(SkGetPackedA32(sc), alpha);
756            unsigned dr = (SkPacked32ToR16(sc) * alpha) + (SkGetPackedR16(dc) * dst_scale);
757            unsigned dg = (SkPacked32ToG16(sc) * alpha) + (SkGetPackedG16(dc) * dst_scale);
758            unsigned db = (SkPacked32ToB16(sc) * alpha) + (SkGetPackedB16(dc) * dst_scale);
759            *dst = SkPackRGB16(SkDiv255Round(dr), SkDiv255Round(dg), SkDiv255Round(db));
760        }
761        dst += 1;
762    }
763}
764
765static void S32_Blend_BlitRow32_mips_dsp(SkPMColor* SK_RESTRICT dst,
766                                         const SkPMColor* SK_RESTRICT src,
767                                         int count, U8CPU alpha) {
768    register int32_t t0, t1, t2, t3, t4, t5, t6, t7;
769
770    __asm__ volatile (
771        ".set            push                         \n\t"
772        ".set            noreorder                    \n\t"
773        "li              %[t2],    0x100              \n\t"
774        "addiu           %[t0],    %[alpha], 1        \n\t"
775        "subu            %[t1],    %[t2],    %[t0]    \n\t"
776        "replv.qb        %[t7],    %[t0]              \n\t"
777        "replv.qb        %[t6],    %[t1]              \n\t"
778    "1:                                               \n\t"
779        "blez            %[count], 2f                 \n\t"
780        "lw              %[t0],    0(%[src])          \n\t"
781        "lw              %[t1],    0(%[dst])          \n\t"
782        "preceu.ph.qbr   %[t2],    %[t0]              \n\t"
783        "preceu.ph.qbl   %[t3],    %[t0]              \n\t"
784        "preceu.ph.qbr   %[t4],    %[t1]              \n\t"
785        "preceu.ph.qbl   %[t5],    %[t1]              \n\t"
786        "muleu_s.ph.qbr  %[t2],    %[t7],    %[t2]    \n\t"
787        "muleu_s.ph.qbr  %[t3],    %[t7],    %[t3]    \n\t"
788        "muleu_s.ph.qbr  %[t4],    %[t6],    %[t4]    \n\t"
789        "muleu_s.ph.qbr  %[t5],    %[t6],    %[t5]    \n\t"
790        "addiu           %[src],   %[src],   4        \n\t"
791        "addiu           %[count], %[count], -1       \n\t"
792        "precrq.qb.ph    %[t0],    %[t3],    %[t2]    \n\t"
793        "precrq.qb.ph    %[t2],    %[t5],    %[t4]    \n\t"
794        "addu            %[t1],    %[t0],    %[t2]    \n\t"
795        "sw              %[t1],    0(%[dst])          \n\t"
796        "b               1b                           \n\t"
797        " addi           %[dst],   %[dst],   4        \n\t"
798    "2:                                               \n\t"
799        ".set            pop                          \n\t"
800        : [src]"+r"(src), [dst]"+r"(dst), [count]"+r"(count),
801          [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
802          [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
803        : [alpha]"r"(alpha)
804        : "memory", "hi", "lo"
805    );
806}
807
808void blitmask_d565_opaque_mips(int width, int height, uint16_t* device,
809                               unsigned deviceRB, const uint8_t* alpha,
810                               uint32_t expanded32, unsigned maskRB) {
811    register uint32_t s0, s1, s2, s3;
812
813    __asm__ volatile (
814        ".set            push                                    \n\t"
815        ".set            noreorder                               \n\t"
816        ".set            noat                                    \n\t"
817        "li              $t9,       0x7E0F81F                    \n\t"
818    "1:                                                          \n\t"
819        "move            $t8,       %[width]                     \n\t"
820        "addiu           %[height], %[height],     -1            \n\t"
821    "2:                                                          \n\t"
822        "beqz            $t8,       4f                           \n\t"
823        " addiu          $t0,       $t8,           -4            \n\t"
824        "bltz            $t0,       3f                           \n\t"
825        " nop                                                    \n\t"
826        "addiu           $t8,       $t8,           -4            \n\t"
827        "lhu             $t0,       0(%[device])                 \n\t"
828        "lhu             $t1,       2(%[device])                 \n\t"
829        "lhu             $t2,       4(%[device])                 \n\t"
830        "lhu             $t3,       6(%[device])                 \n\t"
831        "lbu             $t4,       0(%[alpha])                  \n\t"
832        "lbu             $t5,       1(%[alpha])                  \n\t"
833        "lbu             $t6,       2(%[alpha])                  \n\t"
834        "lbu             $t7,       3(%[alpha])                  \n\t"
835        "replv.ph        $t0,       $t0                          \n\t"
836        "replv.ph        $t1,       $t1                          \n\t"
837        "replv.ph        $t2,       $t2                          \n\t"
838        "replv.ph        $t3,       $t3                          \n\t"
839        "addiu           %[s0],     $t4,           1             \n\t"
840        "addiu           %[s1],     $t5,           1             \n\t"
841        "addiu           %[s2],     $t6,           1             \n\t"
842        "addiu           %[s3],     $t7,           1             \n\t"
843        "srl             %[s0],     %[s0],         3             \n\t"
844        "srl             %[s1],     %[s1],         3             \n\t"
845        "srl             %[s2],     %[s2],         3             \n\t"
846        "srl             %[s3],     %[s3],         3             \n\t"
847        "and             $t0,       $t0,           $t9           \n\t"
848        "and             $t1,       $t1,           $t9           \n\t"
849        "and             $t2,       $t2,           $t9           \n\t"
850        "and             $t3,       $t3,           $t9           \n\t"
851        "subu            $t4,       %[expanded32], $t0           \n\t"
852        "subu            $t5,       %[expanded32], $t1           \n\t"
853        "subu            $t6,       %[expanded32], $t2           \n\t"
854        "subu            $t7,       %[expanded32], $t3           \n\t"
855        "mul             $t4,       $t4,           %[s0]         \n\t"
856        "mul             $t5,       $t5,           %[s1]         \n\t"
857        "mul             $t6,       $t6,           %[s2]         \n\t"
858        "mul             $t7,       $t7,           %[s3]         \n\t"
859        "addiu           %[alpha],  %[alpha],      4             \n\t"
860        "srl             $t4,       $t4,           5             \n\t"
861        "srl             $t5,       $t5,           5             \n\t"
862        "srl             $t6,       $t6,           5             \n\t"
863        "srl             $t7,       $t7,           5             \n\t"
864        "addu            $t4,       $t0,           $t4           \n\t"
865        "addu            $t5,       $t1,           $t5           \n\t"
866        "addu            $t6,       $t2,           $t6           \n\t"
867        "addu            $t7,       $t3,           $t7           \n\t"
868        "and             $t4,       $t4,           $t9           \n\t"
869        "and             $t5,       $t5,           $t9           \n\t"
870        "and             $t6,       $t6,           $t9           \n\t"
871        "and             $t7,       $t7,           $t9           \n\t"
872        "srl             $t0,       $t4,           16            \n\t"
873        "srl             $t1,       $t5,           16            \n\t"
874        "srl             $t2,       $t6,           16            \n\t"
875        "srl             $t3,       $t7,           16            \n\t"
876        "or              %[s0],     $t0,           $t4           \n\t"
877        "or              %[s1],     $t1,           $t5           \n\t"
878        "or              %[s2],     $t2,           $t6           \n\t"
879        "or              %[s3],     $t3,           $t7           \n\t"
880        "sh              %[s0],     0(%[device])                 \n\t"
881        "sh              %[s1],     2(%[device])                 \n\t"
882        "sh              %[s2],     4(%[device])                 \n\t"
883        "sh              %[s3],     6(%[device])                 \n\t"
884        "b               2b                                      \n\t"
885        " addiu          %[device], %[device],     8             \n\t"
886    "3:                                                          \n\t"
887        "lhu             $t0,       0(%[device])                 \n\t"
888        "lbu             $t1,       0(%[alpha])                  \n\t"
889        "addiu           $t8,       $t8,           -1            \n\t"
890        "replv.ph        $t2,       $t0                          \n\t"
891        "and             $t2,       $t2,           $t9           \n\t"
892        "addiu           $t0,       $t1,           1             \n\t"
893        "srl             $t0,       $t0,           3             \n\t"
894        "subu            $t3,       %[expanded32], $t2           \n\t"
895        "mul             $t3,       $t3,           $t0           \n\t"
896        "addiu           %[alpha],  %[alpha],      1             \n\t"
897        "srl             $t3,       $t3,           5             \n\t"
898        "addu            $t3,       $t2,           $t3           \n\t"
899        "and             $t3,       $t3,           $t9           \n\t"
900        "srl             $t4,       $t3,           16            \n\t"
901        "or              %[s0],     $t4,           $t3           \n\t"
902        "sh              %[s0],     0(%[device])                 \n\t"
903        "bnez            $t8,       3b                           \n\t"
904         "addiu          %[device], %[device],     2             \n\t"
905    "4:                                                          \n\t"
906        "addu            %[device], %[device],     %[deviceRB]   \n\t"
907        "bgtz            %[height], 1b                           \n\t"
908        " addu           %[alpha],  %[alpha],      %[maskRB]     \n\t"
909        ".set            pop                                     \n\t"
910        : [height]"+r"(height), [alpha]"+r"(alpha), [device]"+r"(device),
911          [deviceRB]"+r"(deviceRB), [maskRB]"+r"(maskRB), [s0]"=&r"(s0),
912          [s1]"=&r"(s1), [s2]"=&r"(s2), [s3]"=&r"(s3)
913        : [expanded32] "r" (expanded32), [width] "r" (width)
914        : "memory", "hi", "lo", "t0", "t1", "t2", "t3",
915          "t4", "t5", "t6", "t7", "t8", "t9"
916    );
917}
918
919///////////////////////////////////////////////////////////////////////////////////////////////////
920
921const SkBlitRow::Proc16 platform_565_procs_mips_dsp[] = {
922    // no dither
923    nullptr,
924    S32_D565_Blend_mips_dsp,
925    S32A_D565_Opaque_mips_dsp,
926    S32A_D565_Blend_mips_dsp,
927
928    // dither
929    S32_D565_Opaque_Dither_mips_dsp,
930    S32_D565_Blend_Dither_mips_dsp,
931    S32A_D565_Opaque_Dither_mips_dsp,
932    nullptr,
933};
934
935static const SkBlitRow::Proc32 platform_32_procs_mips_dsp[] = {
936    nullptr,   // S32_Opaque,
937    S32_Blend_BlitRow32_mips_dsp,   // S32_Blend,
938    nullptr,   // S32A_Opaque,
939    nullptr,   // S32A_Blend,
940};
941
942SkBlitRow::Proc16 SkBlitRow::PlatformFactory565(unsigned flags) {
943    return platform_565_procs_mips_dsp[flags];
944}
945
946SkBlitRow::ColorProc16 SkBlitRow::PlatformColorFactory565(unsigned flags) {
947    return nullptr;
948}
949
950SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
951    return platform_32_procs_mips_dsp[flags];
952}
953