1// Copyright 2014 Google Inc. All Rights Reserved.
2//
3// Use of this source code is governed by a BSD-style license
4// that can be found in the COPYING file in the root of the source
5// tree. An additional intellectual property rights grant can be found
6// in the file PATENTS. All contributing project authors may
7// be found in the AUTHORS file in the root of the source tree.
8// -----------------------------------------------------------------------------
9//
10// Image transforms and color space conversion methods for lossless decoder.
11//
12// Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
13//             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
14
15#include "./dsp.h"
16
17#if defined(WEBP_USE_MIPS_DSP_R2)
18
19#include "./lossless.h"
20
21#define MAP_COLOR_FUNCS(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE)                 \
22static void FUNC_NAME(const TYPE* src,                                         \
23                      const uint32_t* const color_map,                         \
24                      TYPE* dst, int y_start, int y_end,                       \
25                      int width) {                                             \
26  int y;                                                                       \
27  for (y = y_start; y < y_end; ++y) {                                          \
28    int x;                                                                     \
29    for (x = 0; x < (width >> 2); ++x) {                                       \
30      int tmp1, tmp2, tmp3, tmp4;                                              \
31      __asm__ volatile (                                                       \
32      ".ifc        " #TYPE ",  uint8_t                  \n\t"                  \
33        "lbu       %[tmp1],  0(%[src])                  \n\t"                  \
34        "lbu       %[tmp2],  1(%[src])                  \n\t"                  \
35        "lbu       %[tmp3],  2(%[src])                  \n\t"                  \
36        "lbu       %[tmp4],  3(%[src])                  \n\t"                  \
37        "addiu     %[src],   %[src],      4             \n\t"                  \
38      ".endif                                           \n\t"                  \
39      ".ifc        " #TYPE ",  uint32_t                 \n\t"                  \
40        "lw        %[tmp1],  0(%[src])                  \n\t"                  \
41        "lw        %[tmp2],  4(%[src])                  \n\t"                  \
42        "lw        %[tmp3],  8(%[src])                  \n\t"                  \
43        "lw        %[tmp4],  12(%[src])                 \n\t"                  \
44        "ext       %[tmp1],  %[tmp1],     8,        8   \n\t"                  \
45        "ext       %[tmp2],  %[tmp2],     8,        8   \n\t"                  \
46        "ext       %[tmp3],  %[tmp3],     8,        8   \n\t"                  \
47        "ext       %[tmp4],  %[tmp4],     8,        8   \n\t"                  \
48        "addiu     %[src],   %[src],      16            \n\t"                  \
49      ".endif                                           \n\t"                  \
50        "sll       %[tmp1],  %[tmp1],     2             \n\t"                  \
51        "sll       %[tmp2],  %[tmp2],     2             \n\t"                  \
52        "sll       %[tmp3],  %[tmp3],     2             \n\t"                  \
53        "sll       %[tmp4],  %[tmp4],     2             \n\t"                  \
54        "lwx       %[tmp1],  %[tmp1](%[color_map])      \n\t"                  \
55        "lwx       %[tmp2],  %[tmp2](%[color_map])      \n\t"                  \
56        "lwx       %[tmp3],  %[tmp3](%[color_map])      \n\t"                  \
57        "lwx       %[tmp4],  %[tmp4](%[color_map])      \n\t"                  \
58      ".ifc        " #TYPE ",  uint8_t                  \n\t"                  \
59        "ext       %[tmp1],  %[tmp1],     8,        8   \n\t"                  \
60        "ext       %[tmp2],  %[tmp2],     8,        8   \n\t"                  \
61        "ext       %[tmp3],  %[tmp3],     8,        8   \n\t"                  \
62        "ext       %[tmp4],  %[tmp4],     8,        8   \n\t"                  \
63        "sb        %[tmp1],  0(%[dst])                  \n\t"                  \
64        "sb        %[tmp2],  1(%[dst])                  \n\t"                  \
65        "sb        %[tmp3],  2(%[dst])                  \n\t"                  \
66        "sb        %[tmp4],  3(%[dst])                  \n\t"                  \
67        "addiu     %[dst],   %[dst],      4             \n\t"                  \
68      ".endif                                           \n\t"                  \
69      ".ifc        " #TYPE ",  uint32_t                 \n\t"                  \
70        "sw        %[tmp1],  0(%[dst])                  \n\t"                  \
71        "sw        %[tmp2],  4(%[dst])                  \n\t"                  \
72        "sw        %[tmp3],  8(%[dst])                  \n\t"                  \
73        "sw        %[tmp4],  12(%[dst])                 \n\t"                  \
74        "addiu     %[dst],   %[dst],      16            \n\t"                  \
75      ".endif                                           \n\t"                  \
76        : [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2), [tmp3]"=&r"(tmp3),             \
77          [tmp4]"=&r"(tmp4), [src]"+&r"(src), [dst]"+r"(dst)                   \
78        : [color_map]"r"(color_map)                                            \
79        : "memory"                                                             \
80      );                                                                       \
81    }                                                                          \
82    for (x = 0; x < (width & 3); ++x) {                                        \
83      *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]);                        \
84    }                                                                          \
85  }                                                                            \
86}
87
88MAP_COLOR_FUNCS(MapARGB, uint32_t, VP8GetARGBIndex, VP8GetARGBValue)
89MAP_COLOR_FUNCS(MapAlpha, uint8_t, VP8GetAlphaIndex, VP8GetAlphaValue)
90
91#undef MAP_COLOR_FUNCS
92
93static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
94                                                   uint32_t c2) {
95  int temp0, temp1, temp2, temp3, temp4, temp5;
96  __asm__ volatile (
97    "preceu.ph.qbr   %[temp1],   %[c0]                 \n\t"
98    "preceu.ph.qbl   %[temp2],   %[c0]                 \n\t"
99    "preceu.ph.qbr   %[temp3],   %[c1]                 \n\t"
100    "preceu.ph.qbl   %[temp4],   %[c1]                 \n\t"
101    "preceu.ph.qbr   %[temp5],   %[c2]                 \n\t"
102    "preceu.ph.qbl   %[temp0],   %[c2]                 \n\t"
103    "subq.ph         %[temp3],   %[temp3],   %[temp5]  \n\t"
104    "subq.ph         %[temp4],   %[temp4],   %[temp0]  \n\t"
105    "addq.ph         %[temp1],   %[temp1],   %[temp3]  \n\t"
106    "addq.ph         %[temp2],   %[temp2],   %[temp4]  \n\t"
107    "shll_s.ph       %[temp1],   %[temp1],   7         \n\t"
108    "shll_s.ph       %[temp2],   %[temp2],   7         \n\t"
109    "precrqu_s.qb.ph %[temp2],   %[temp2],   %[temp1]  \n\t"
110    : [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
111      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5)
112    : [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2)
113    : "memory"
114  );
115  return temp2;
116}
117
118static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
119                                                   uint32_t c2) {
120  int temp0, temp1, temp2, temp3, temp4, temp5;
121  __asm__ volatile (
122    "adduh.qb         %[temp5],   %[c0],      %[c1]       \n\t"
123    "preceu.ph.qbr    %[temp3],   %[c2]                   \n\t"
124    "preceu.ph.qbr    %[temp1],   %[temp5]                \n\t"
125    "preceu.ph.qbl    %[temp2],   %[temp5]                \n\t"
126    "preceu.ph.qbl    %[temp4],   %[c2]                   \n\t"
127    "subq.ph          %[temp3],   %[temp1],   %[temp3]    \n\t"
128    "subq.ph          %[temp4],   %[temp2],   %[temp4]    \n\t"
129    "shrl.ph          %[temp5],   %[temp3],   15          \n\t"
130    "shrl.ph          %[temp0],   %[temp4],   15          \n\t"
131    "addq.ph          %[temp3],   %[temp3],   %[temp5]    \n\t"
132    "addq.ph          %[temp4],   %[temp0],   %[temp4]    \n\t"
133    "shra.ph          %[temp3],   %[temp3],   1           \n\t"
134    "shra.ph          %[temp4],   %[temp4],   1           \n\t"
135    "addq.ph          %[temp1],   %[temp1],   %[temp3]    \n\t"
136    "addq.ph          %[temp2],   %[temp2],   %[temp4]    \n\t"
137    "shll_s.ph        %[temp1],   %[temp1],   7           \n\t"
138    "shll_s.ph        %[temp2],   %[temp2],   7           \n\t"
139    "precrqu_s.qb.ph  %[temp1],   %[temp2],   %[temp1]    \n\t"
140    : [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
141      [temp3]"=&r"(temp3), [temp4]"=r"(temp4), [temp5]"=&r"(temp5)
142    : [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2)
143    : "memory"
144  );
145  return temp1;
146}
147
148static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
149  int temp0, temp1, temp2, temp3, temp4, temp5;
150  __asm__ volatile (
151    "cmpgdu.lt.qb %[temp1], %[c],     %[b]             \n\t"
152    "pick.qb      %[temp1], %[b],     %[c]             \n\t"
153    "pick.qb      %[temp2], %[c],     %[b]             \n\t"
154    "cmpgdu.lt.qb %[temp4], %[c],     %[a]             \n\t"
155    "pick.qb      %[temp4], %[a],     %[c]             \n\t"
156    "pick.qb      %[temp5], %[c],     %[a]             \n\t"
157    "subu.qb      %[temp3], %[temp1], %[temp2]         \n\t"
158    "subu.qb      %[temp0], %[temp4], %[temp5]         \n\t"
159    "raddu.w.qb   %[temp3], %[temp3]                   \n\t"
160    "raddu.w.qb   %[temp0], %[temp0]                   \n\t"
161    "subu         %[temp3], %[temp3], %[temp0]         \n\t"
162    "slti         %[temp0], %[temp3], 0x1              \n\t"
163    "movz         %[a],     %[b],     %[temp0]         \n\t"
164    : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
165      [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp0]"=&r"(temp0),
166      [a]"+&r"(a)
167    : [b]"r"(b), [c]"r"(c)
168  );
169  return a;
170}
171
172static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
173  __asm__ volatile (
174    "adduh.qb    %[a0], %[a0], %[a1]       \n\t"
175    : [a0]"+r"(a0)
176    : [a1]"r"(a1)
177  );
178  return a0;
179}
180
181static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
182  return Average2(Average2(a0, a2), a1);
183}
184
185static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
186                                     uint32_t a2, uint32_t a3) {
187  return Average2(Average2(a0, a1), Average2(a2, a3));
188}
189
190static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
191  return Average3(left, top[0], top[1]);
192}
193
194static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
195  return Average2(left, top[-1]);
196}
197
198static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
199  return Average2(left, top[0]);
200}
201
202static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
203  (void)left;
204  return Average2(top[-1], top[0]);
205}
206
207static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
208  (void)left;
209  return Average2(top[0], top[1]);
210}
211
212static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
213  return Average4(left, top[-1], top[0], top[1]);
214}
215
216static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
217  return Select(top[0], left, top[-1]);
218}
219
220static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
221  return ClampedAddSubtractFull(left, top[0], top[-1]);
222}
223
224static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
225  return ClampedAddSubtractHalf(left, top[0], top[-1]);
226}
227
228// Add green to blue and red channels (i.e. perform the inverse transform of
229// 'subtract green').
230static void AddGreenToBlueAndRed(uint32_t* data, int num_pixels) {
231  uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
232  uint32_t* const p_loop1_end = data + (num_pixels & ~3);
233  uint32_t* const p_loop2_end = data + num_pixels;
234  __asm__ volatile (
235    ".set       push                                          \n\t"
236    ".set       noreorder                                     \n\t"
237    "beq        %[data],         %[p_loop1_end],     3f       \n\t"
238    " nop                                                     \n\t"
239  "0:                                                         \n\t"
240    "lw         %[temp0],        0(%[data])                   \n\t"
241    "lw         %[temp1],        4(%[data])                   \n\t"
242    "lw         %[temp2],        8(%[data])                   \n\t"
243    "lw         %[temp3],        12(%[data])                  \n\t"
244    "ext        %[temp4],        %[temp0],           8,    8  \n\t"
245    "ext        %[temp5],        %[temp1],           8,    8  \n\t"
246    "ext        %[temp6],        %[temp2],           8,    8  \n\t"
247    "ext        %[temp7],        %[temp3],           8,    8  \n\t"
248    "addiu      %[data],         %[data],            16       \n\t"
249    "replv.ph   %[temp4],        %[temp4]                     \n\t"
250    "replv.ph   %[temp5],        %[temp5]                     \n\t"
251    "replv.ph   %[temp6],        %[temp6]                     \n\t"
252    "replv.ph   %[temp7],        %[temp7]                     \n\t"
253    "addu.qb    %[temp0],        %[temp0],           %[temp4] \n\t"
254    "addu.qb    %[temp1],        %[temp1],           %[temp5] \n\t"
255    "addu.qb    %[temp2],        %[temp2],           %[temp6] \n\t"
256    "addu.qb    %[temp3],        %[temp3],           %[temp7] \n\t"
257    "sw         %[temp0],        -16(%[data])                 \n\t"
258    "sw         %[temp1],        -12(%[data])                 \n\t"
259    "sw         %[temp2],        -8(%[data])                  \n\t"
260    "bne        %[data],         %[p_loop1_end],     0b       \n\t"
261    " sw        %[temp3],        -4(%[data])                  \n\t"
262  "3:                                                         \n\t"
263    "beq        %[data],         %[p_loop2_end],     2f       \n\t"
264    " nop                                                     \n\t"
265  "1:                                                         \n\t"
266    "lw         %[temp0],        0(%[data])                   \n\t"
267    "addiu      %[data],         %[data],            4        \n\t"
268    "ext        %[temp4],        %[temp0],           8,    8  \n\t"
269    "replv.ph   %[temp4],        %[temp4]                     \n\t"
270    "addu.qb    %[temp0],        %[temp0],           %[temp4] \n\t"
271    "bne        %[data],         %[p_loop2_end],     1b       \n\t"
272    " sw        %[temp0],        -4(%[data])                  \n\t"
273  "2:                                                         \n\t"
274    ".set       pop                                           \n\t"
275    : [data]"+&r"(data), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
276      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
277      [temp5]"=&r"(temp5), [temp6]"=&r"(temp6), [temp7]"=&r"(temp7)
278    : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
279    : "memory"
280  );
281}
282
283static void TransformColorInverse(const VP8LMultipliers* const m,
284                                  uint32_t* data, int num_pixels) {
285  int temp0, temp1, temp2, temp3, temp4, temp5;
286  uint32_t argb, argb1, new_red;
287  const uint32_t G_to_R = m->green_to_red_;
288  const uint32_t G_to_B = m->green_to_blue_;
289  const uint32_t R_to_B = m->red_to_blue_;
290  uint32_t* const p_loop_end = data + (num_pixels & ~1);
291  __asm__ volatile (
292    ".set            push                                    \n\t"
293    ".set            noreorder                               \n\t"
294    "beq             %[data],      %[p_loop_end],  1f        \n\t"
295    " nop                                                    \n\t"
296    "replv.ph        %[temp0],     %[G_to_R]                 \n\t"
297    "replv.ph        %[temp1],     %[G_to_B]                 \n\t"
298    "replv.ph        %[temp2],     %[R_to_B]                 \n\t"
299    "shll.ph         %[temp0],     %[temp0],       8         \n\t"
300    "shll.ph         %[temp1],     %[temp1],       8         \n\t"
301    "shll.ph         %[temp2],     %[temp2],       8         \n\t"
302    "shra.ph         %[temp0],     %[temp0],       8         \n\t"
303    "shra.ph         %[temp1],     %[temp1],       8         \n\t"
304    "shra.ph         %[temp2],     %[temp2],       8         \n\t"
305  "0:                                                        \n\t"
306    "lw              %[argb],      0(%[data])                \n\t"
307    "lw              %[argb1],     4(%[data])                \n\t"
308    "addiu           %[data],      %[data],        8         \n\t"
309    "precrq.qb.ph    %[temp3],     %[argb],        %[argb1]  \n\t"
310    "preceu.ph.qbra  %[temp3],     %[temp3]                  \n\t"
311    "shll.ph         %[temp3],     %[temp3],       8         \n\t"
312    "shra.ph         %[temp3],     %[temp3],       8         \n\t"
313    "mul.ph          %[temp5],     %[temp3],       %[temp0]  \n\t"
314    "mul.ph          %[temp3],     %[temp3],       %[temp1]  \n\t"
315    "precrq.ph.w     %[new_red],   %[argb],        %[argb1]  \n\t"
316    "ins             %[argb1],     %[argb],        16,   16  \n\t"
317    "shra.ph         %[temp5],     %[temp5],       5         \n\t"
318    "shra.ph         %[temp3],     %[temp3],       5         \n\t"
319    "addu.ph         %[new_red],   %[new_red],     %[temp5]  \n\t"
320    "addu.ph         %[argb1],     %[argb1],       %[temp3]  \n\t"
321    "preceu.ph.qbra  %[temp5],     %[new_red]                \n\t"
322    "shll.ph         %[temp4],     %[temp5],       8         \n\t"
323    "shra.ph         %[temp4],     %[temp4],       8         \n\t"
324    "mul.ph          %[temp4],     %[temp4],       %[temp2]  \n\t"
325    "sb              %[temp5],     -2(%[data])               \n\t"
326    "sra             %[temp5],     %[temp5],       16        \n\t"
327    "shra.ph         %[temp4],     %[temp4],       5         \n\t"
328    "addu.ph         %[argb1],     %[argb1],       %[temp4]  \n\t"
329    "preceu.ph.qbra  %[temp3],     %[argb1]                  \n\t"
330    "sb              %[temp5],     -6(%[data])               \n\t"
331    "sb              %[temp3],     -4(%[data])               \n\t"
332    "sra             %[temp3],     %[temp3],       16        \n\t"
333    "bne             %[data],      %[p_loop_end],  0b        \n\t"
334    " sb             %[temp3],     -8(%[data])               \n\t"
335  "1:                                                        \n\t"
336    ".set            pop                                     \n\t"
337    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
338      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
339      [new_red]"=&r"(new_red), [argb]"=&r"(argb),
340      [argb1]"=&r"(argb1), [data]"+&r"(data)
341    : [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B),
342      [G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end)
343    : "memory", "hi", "lo"
344  );
345
346  // Fall-back to C-version for left-overs.
347  if (num_pixels & 1) VP8LTransformColorInverse_C(m, data, 1);
348}
349
350static void ConvertBGRAToRGB(const uint32_t* src,
351                             int num_pixels, uint8_t* dst) {
352  int temp0, temp1, temp2, temp3;
353  const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
354  const uint32_t* const p_loop2_end = src + num_pixels;
355  __asm__ volatile (
356    ".set       push                                       \n\t"
357    ".set       noreorder                                  \n\t"
358    "beq        %[src],      %[p_loop1_end],    3f         \n\t"
359    " nop                                                  \n\t"
360  "0:                                                      \n\t"
361    "lw         %[temp3],    12(%[src])                    \n\t"
362    "lw         %[temp2],    8(%[src])                     \n\t"
363    "lw         %[temp1],    4(%[src])                     \n\t"
364    "lw         %[temp0],    0(%[src])                     \n\t"
365    "ins        %[temp3],    %[temp2],          24,   8    \n\t"
366    "sll        %[temp2],    %[temp2],          8          \n\t"
367    "rotr       %[temp3],    %[temp3],          16         \n\t"
368    "ins        %[temp2],    %[temp1],          0,    16   \n\t"
369    "sll        %[temp1],    %[temp1],          8          \n\t"
370    "wsbh       %[temp3],    %[temp3]                      \n\t"
371    "balign     %[temp0],    %[temp1],          1          \n\t"
372    "wsbh       %[temp2],    %[temp2]                      \n\t"
373    "wsbh       %[temp0],    %[temp0]                      \n\t"
374    "usw        %[temp3],    8(%[dst])                     \n\t"
375    "rotr       %[temp0],    %[temp0],          16         \n\t"
376    "usw        %[temp2],    4(%[dst])                     \n\t"
377    "addiu      %[src],      %[src],            16         \n\t"
378    "usw        %[temp0],    0(%[dst])                     \n\t"
379    "bne        %[src],      %[p_loop1_end],    0b         \n\t"
380    " addiu     %[dst],      %[dst],            12         \n\t"
381  "3:                                                      \n\t"
382    "beq        %[src],      %[p_loop2_end],    2f         \n\t"
383    " nop                                                  \n\t"
384  "1:                                                      \n\t"
385    "lw         %[temp0],    0(%[src])                     \n\t"
386    "addiu      %[src],      %[src],            4          \n\t"
387    "wsbh       %[temp1],    %[temp0]                      \n\t"
388    "addiu      %[dst],      %[dst],            3          \n\t"
389    "ush        %[temp1],    -2(%[dst])                    \n\t"
390    "sra        %[temp0],    %[temp0],          16         \n\t"
391    "bne        %[src],      %[p_loop2_end],    1b         \n\t"
392    " sb        %[temp0],    -3(%[dst])                    \n\t"
393  "2:                                                      \n\t"
394    ".set       pop                                        \n\t"
395    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
396      [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
397    : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
398    : "memory"
399  );
400}
401
402static void ConvertBGRAToRGBA(const uint32_t* src,
403                              int num_pixels, uint8_t* dst) {
404  int temp0, temp1, temp2, temp3;
405  const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
406  const uint32_t* const p_loop2_end = src + num_pixels;
407  __asm__ volatile (
408    ".set       push                                       \n\t"
409    ".set       noreorder                                  \n\t"
410    "beq        %[src],      %[p_loop1_end],    3f         \n\t"
411    " nop                                                  \n\t"
412  "0:                                                      \n\t"
413    "lw         %[temp0],    0(%[src])                     \n\t"
414    "lw         %[temp1],    4(%[src])                     \n\t"
415    "lw         %[temp2],    8(%[src])                     \n\t"
416    "lw         %[temp3],    12(%[src])                    \n\t"
417    "wsbh       %[temp0],    %[temp0]                      \n\t"
418    "wsbh       %[temp1],    %[temp1]                      \n\t"
419    "wsbh       %[temp2],    %[temp2]                      \n\t"
420    "wsbh       %[temp3],    %[temp3]                      \n\t"
421    "addiu      %[src],      %[src],            16         \n\t"
422    "balign     %[temp0],    %[temp0],          1          \n\t"
423    "balign     %[temp1],    %[temp1],          1          \n\t"
424    "balign     %[temp2],    %[temp2],          1          \n\t"
425    "balign     %[temp3],    %[temp3],          1          \n\t"
426    "usw        %[temp0],    0(%[dst])                     \n\t"
427    "usw        %[temp1],    4(%[dst])                     \n\t"
428    "usw        %[temp2],    8(%[dst])                     \n\t"
429    "usw        %[temp3],    12(%[dst])                    \n\t"
430    "bne        %[src],      %[p_loop1_end],    0b         \n\t"
431    " addiu     %[dst],      %[dst],            16         \n\t"
432  "3:                                                      \n\t"
433    "beq        %[src],      %[p_loop2_end],    2f         \n\t"
434    " nop                                                  \n\t"
435  "1:                                                      \n\t"
436    "lw         %[temp0],    0(%[src])                     \n\t"
437    "wsbh       %[temp0],    %[temp0]                      \n\t"
438    "addiu      %[src],      %[src],            4          \n\t"
439    "balign     %[temp0],    %[temp0],          1          \n\t"
440    "usw        %[temp0],    0(%[dst])                     \n\t"
441    "bne        %[src],      %[p_loop2_end],    1b         \n\t"
442    " addiu     %[dst],      %[dst],            4          \n\t"
443  "2:                                                      \n\t"
444    ".set       pop                                        \n\t"
445    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
446      [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
447    : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
448    : "memory"
449  );
450}
451
452static void ConvertBGRAToRGBA4444(const uint32_t* src,
453                                  int num_pixels, uint8_t* dst) {
454  int temp0, temp1, temp2, temp3, temp4, temp5;
455  const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
456  const uint32_t* const p_loop2_end = src + num_pixels;
457  __asm__ volatile (
458    ".set           push                                       \n\t"
459    ".set           noreorder                                  \n\t"
460    "beq            %[src],      %[p_loop1_end],    3f         \n\t"
461    " nop                                                      \n\t"
462  "0:                                                          \n\t"
463    "lw             %[temp0],    0(%[src])                     \n\t"
464    "lw             %[temp1],    4(%[src])                     \n\t"
465    "lw             %[temp2],    8(%[src])                     \n\t"
466    "lw             %[temp3],    12(%[src])                    \n\t"
467    "ext            %[temp4],    %[temp0],          28,   4    \n\t"
468    "ext            %[temp5],    %[temp0],          12,   4    \n\t"
469    "ins            %[temp0],    %[temp4],          0,    4    \n\t"
470    "ext            %[temp4],    %[temp1],          28,   4    \n\t"
471    "ins            %[temp0],    %[temp5],          16,   4    \n\t"
472    "ext            %[temp5],    %[temp1],          12,   4    \n\t"
473    "ins            %[temp1],    %[temp4],          0,    4    \n\t"
474    "ext            %[temp4],    %[temp2],          28,   4    \n\t"
475    "ins            %[temp1],    %[temp5],          16,   4    \n\t"
476    "ext            %[temp5],    %[temp2],          12,   4    \n\t"
477    "ins            %[temp2],    %[temp4],          0,    4    \n\t"
478    "ext            %[temp4],    %[temp3],          28,   4    \n\t"
479    "ins            %[temp2],    %[temp5],          16,   4    \n\t"
480    "ext            %[temp5],    %[temp3],          12,   4    \n\t"
481    "ins            %[temp3],    %[temp4],          0,    4    \n\t"
482    "precr.qb.ph    %[temp1],    %[temp1],          %[temp0]   \n\t"
483    "ins            %[temp3],    %[temp5],          16,   4    \n\t"
484    "addiu          %[src],      %[src],            16         \n\t"
485    "precr.qb.ph    %[temp3],    %[temp3],          %[temp2]   \n\t"
486#ifdef WEBP_SWAP_16BIT_CSP
487    "usw            %[temp1],    0(%[dst])                     \n\t"
488    "usw            %[temp3],    4(%[dst])                     \n\t"
489#else
490    "wsbh           %[temp1],    %[temp1]                      \n\t"
491    "wsbh           %[temp3],    %[temp3]                      \n\t"
492    "usw            %[temp1],    0(%[dst])                     \n\t"
493    "usw            %[temp3],    4(%[dst])                     \n\t"
494#endif
495    "bne            %[src],      %[p_loop1_end],    0b         \n\t"
496    " addiu         %[dst],      %[dst],            8          \n\t"
497  "3:                                                          \n\t"
498    "beq            %[src],      %[p_loop2_end],    2f         \n\t"
499    " nop                                                      \n\t"
500  "1:                                                          \n\t"
501    "lw             %[temp0],    0(%[src])                     \n\t"
502    "ext            %[temp4],    %[temp0],          28,   4    \n\t"
503    "ext            %[temp5],    %[temp0],          12,   4    \n\t"
504    "ins            %[temp0],    %[temp4],          0,    4    \n\t"
505    "ins            %[temp0],    %[temp5],          16,   4    \n\t"
506    "addiu          %[src],      %[src],            4          \n\t"
507    "precr.qb.ph    %[temp0],    %[temp0],          %[temp0]   \n\t"
508#ifdef WEBP_SWAP_16BIT_CSP
509    "ush            %[temp0],    0(%[dst])                     \n\t"
510#else
511    "wsbh           %[temp0],    %[temp0]                      \n\t"
512    "ush            %[temp0],    0(%[dst])                     \n\t"
513#endif
514    "bne            %[src],      %[p_loop2_end],    1b         \n\t"
515    " addiu         %[dst],      %[dst],            2          \n\t"
516  "2:                                                          \n\t"
517    ".set           pop                                        \n\t"
518    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
519      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
520      [dst]"+&r"(dst), [src]"+&r"(src)
521    : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
522    : "memory"
523  );
524}
525
526static void ConvertBGRAToRGB565(const uint32_t* src,
527                                int num_pixels, uint8_t* dst) {
528  int temp0, temp1, temp2, temp3, temp4, temp5;
529  const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
530  const uint32_t* const p_loop2_end = src + num_pixels;
531  __asm__ volatile (
532    ".set           push                                       \n\t"
533    ".set           noreorder                                  \n\t"
534    "beq            %[src],      %[p_loop1_end],    3f         \n\t"
535    " nop                                                      \n\t"
536  "0:                                                          \n\t"
537    "lw             %[temp0],    0(%[src])                     \n\t"
538    "lw             %[temp1],    4(%[src])                     \n\t"
539    "lw             %[temp2],    8(%[src])                     \n\t"
540    "lw             %[temp3],    12(%[src])                    \n\t"
541    "ext            %[temp4],    %[temp0],          8,    16   \n\t"
542    "ext            %[temp5],    %[temp0],          5,    11   \n\t"
543    "ext            %[temp0],    %[temp0],          3,    5    \n\t"
544    "ins            %[temp4],    %[temp5],          0,    11   \n\t"
545    "ext            %[temp5],    %[temp1],          5,    11   \n\t"
546    "ins            %[temp4],    %[temp0],          0,    5    \n\t"
547    "ext            %[temp0],    %[temp1],          8,    16   \n\t"
548    "ext            %[temp1],    %[temp1],          3,    5    \n\t"
549    "ins            %[temp0],    %[temp5],          0,    11   \n\t"
550    "ext            %[temp5],    %[temp2],          5,    11   \n\t"
551    "ins            %[temp0],    %[temp1],          0,    5    \n\t"
552    "ext            %[temp1],    %[temp2],          8,    16   \n\t"
553    "ext            %[temp2],    %[temp2],          3,    5    \n\t"
554    "ins            %[temp1],    %[temp5],          0,    11   \n\t"
555    "ext            %[temp5],    %[temp3],          5,    11   \n\t"
556    "ins            %[temp1],    %[temp2],          0,    5    \n\t"
557    "ext            %[temp2],    %[temp3],          8,    16   \n\t"
558    "ext            %[temp3],    %[temp3],          3,    5    \n\t"
559    "ins            %[temp2],    %[temp5],          0,    11   \n\t"
560    "append         %[temp0],    %[temp4],          16         \n\t"
561    "ins            %[temp2],    %[temp3],          0,    5    \n\t"
562    "addiu          %[src],      %[src],            16         \n\t"
563    "append         %[temp2],    %[temp1],          16         \n\t"
564#ifdef WEBP_SWAP_16BIT_CSP
565    "usw            %[temp0],    0(%[dst])                     \n\t"
566    "usw            %[temp2],    4(%[dst])                     \n\t"
567#else
568    "wsbh           %[temp0],    %[temp0]                      \n\t"
569    "wsbh           %[temp2],    %[temp2]                      \n\t"
570    "usw            %[temp0],    0(%[dst])                     \n\t"
571    "usw            %[temp2],    4(%[dst])                     \n\t"
572#endif
573    "bne            %[src],      %[p_loop1_end],    0b         \n\t"
574    " addiu         %[dst],      %[dst],            8          \n\t"
575  "3:                                                          \n\t"
576    "beq            %[src],      %[p_loop2_end],    2f         \n\t"
577    " nop                                                      \n\t"
578  "1:                                                          \n\t"
579    "lw             %[temp0],    0(%[src])                     \n\t"
580    "ext            %[temp4],    %[temp0],          8,    16   \n\t"
581    "ext            %[temp5],    %[temp0],          5,    11   \n\t"
582    "ext            %[temp0],    %[temp0],          3,    5    \n\t"
583    "ins            %[temp4],    %[temp5],          0,    11   \n\t"
584    "addiu          %[src],      %[src],            4          \n\t"
585    "ins            %[temp4],    %[temp0],          0,    5    \n\t"
586#ifdef WEBP_SWAP_16BIT_CSP
587    "ush            %[temp4],    0(%[dst])                     \n\t"
588#else
589    "wsbh           %[temp4],    %[temp4]                      \n\t"
590    "ush            %[temp4],    0(%[dst])                     \n\t"
591#endif
592    "bne            %[src],      %[p_loop2_end],    1b         \n\t"
593    " addiu         %[dst],      %[dst],            2          \n\t"
594  "2:                                                          \n\t"
595    ".set           pop                                        \n\t"
596    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
597      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
598      [dst]"+&r"(dst), [src]"+&r"(src)
599    : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
600    : "memory"
601  );
602}
603
604static void ConvertBGRAToBGR(const uint32_t* src,
605                             int num_pixels, uint8_t* dst) {
606  int temp0, temp1, temp2, temp3;
607  const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
608  const uint32_t* const p_loop2_end = src + num_pixels;
609  __asm__ volatile (
610    ".set       push                                         \n\t"
611    ".set       noreorder                                    \n\t"
612    "beq        %[src],      %[p_loop1_end],    3f           \n\t"
613    " nop                                                    \n\t"
614  "0:                                                        \n\t"
615    "lw         %[temp0],    0(%[src])                       \n\t"
616    "lw         %[temp1],    4(%[src])                       \n\t"
617    "lw         %[temp2],    8(%[src])                       \n\t"
618    "lw         %[temp3],    12(%[src])                      \n\t"
619    "ins        %[temp0],    %[temp1],          24,    8     \n\t"
620    "sra        %[temp1],    %[temp1],          8            \n\t"
621    "ins        %[temp1],    %[temp2],          16,    16    \n\t"
622    "sll        %[temp2],    %[temp2],          8            \n\t"
623    "balign     %[temp3],    %[temp2],          1            \n\t"
624    "addiu      %[src],      %[src],            16           \n\t"
625    "usw        %[temp0],    0(%[dst])                       \n\t"
626    "usw        %[temp1],    4(%[dst])                       \n\t"
627    "usw        %[temp3],    8(%[dst])                       \n\t"
628    "bne        %[src],      %[p_loop1_end],    0b           \n\t"
629    " addiu     %[dst],      %[dst],            12           \n\t"
630  "3:                                                        \n\t"
631    "beq        %[src],      %[p_loop2_end],    2f           \n\t"
632    " nop                                                    \n\t"
633  "1:                                                        \n\t"
634    "lw         %[temp0],    0(%[src])                       \n\t"
635    "addiu      %[src],      %[src],            4            \n\t"
636    "addiu      %[dst],      %[dst],            3            \n\t"
637    "ush        %[temp0],    -3(%[dst])                      \n\t"
638    "sra        %[temp0],    %[temp0],          16           \n\t"
639    "bne        %[src],      %[p_loop2_end],    1b           \n\t"
640    " sb        %[temp0],    -1(%[dst])                      \n\t"
641  "2:                                                        \n\t"
642    ".set       pop                                          \n\t"
643    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
644      [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
645    : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
646    : "memory"
647  );
648}
649
650//------------------------------------------------------------------------------
651// Entry point
652
653extern void VP8LDspInitMIPSdspR2(void);
654
655WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitMIPSdspR2(void) {
656  VP8LMapColor32b = MapARGB;
657  VP8LMapColor8b = MapAlpha;
658  VP8LPredictors[5] = Predictor5;
659  VP8LPredictors[6] = Predictor6;
660  VP8LPredictors[7] = Predictor7;
661  VP8LPredictors[8] = Predictor8;
662  VP8LPredictors[9] = Predictor9;
663  VP8LPredictors[10] = Predictor10;
664  VP8LPredictors[11] = Predictor11;
665  VP8LPredictors[12] = Predictor12;
666  VP8LPredictors[13] = Predictor13;
667  VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed;
668  VP8LTransformColorInverse = TransformColorInverse;
669  VP8LConvertBGRAToRGB = ConvertBGRAToRGB;
670  VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA;
671  VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444;
672  VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565;
673  VP8LConvertBGRAToBGR = ConvertBGRAToBGR;
674}
675
676#else  // !WEBP_USE_MIPS_DSP_R2
677
678WEBP_DSP_INIT_STUB(VP8LDspInitMIPSdspR2)
679
680#endif  // WEBP_USE_MIPS_DSP_R2
681