dec_mips32.c revision af51b94a435132e9014c324e25fb686b3d07a8c8
1// Copyright 2014 Google Inc. All Rights Reserved.
2//
3// Use of this source code is governed by a BSD-style license
4// that can be found in the COPYING file in the root of the source
5// tree. An additional intellectual property rights grant can be found
6// in the file PATENTS. All contributing project authors may
7// be found in the AUTHORS file in the root of the source tree.
8// -----------------------------------------------------------------------------
9//
10// MIPS version of dsp functions
11//
12// Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
13//             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
14
15#include "./dsp.h"
16
17#if defined(WEBP_USE_MIPS32)
18
19static const int kC1 = 20091 + (1 << 16);
20static const int kC2 = 35468;
21
22static WEBP_INLINE int abs_mips32(int x) {
23  const int sign = x >> 31;
24  return (x ^ sign) - sign;
25}
26
27// 4 pixels in, 2 pixels out
28static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
29  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
30  const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1];
31  const int a1 = VP8ksclip2[(a + 4) >> 3];
32  const int a2 = VP8ksclip2[(a + 3) >> 3];
33  p[-step] = VP8kclip1[p0 + a2];
34  p[    0] = VP8kclip1[q0 - a1];
35}
36
37// 4 pixels in, 4 pixels out
38static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
39  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
40  const int a = 3 * (q0 - p0);
41  const int a1 = VP8ksclip2[(a + 4) >> 3];
42  const int a2 = VP8ksclip2[(a + 3) >> 3];
43  const int a3 = (a1 + 1) >> 1;
44  p[-2 * step] = VP8kclip1[p1 + a3];
45  p[-    step] = VP8kclip1[p0 + a2];
46  p[        0] = VP8kclip1[q0 - a1];
47  p[     step] = VP8kclip1[q1 - a3];
48}
49
50// 6 pixels in, 6 pixels out
51static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
52  const int p2 = p[-3 * step], p1 = p[-2 * step], p0 = p[-step];
53  const int q0 = p[0], q1 = p[step], q2 = p[2 * step];
54  const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]];
55  const int a1 = (27 * a + 63) >> 7;  // eq. to ((3 * a + 7) * 9) >> 7
56  const int a2 = (18 * a + 63) >> 7;  // eq. to ((2 * a + 7) * 9) >> 7
57  const int a3 = (9  * a + 63) >> 7;  // eq. to ((1 * a + 7) * 9) >> 7
58  p[-3 * step] = VP8kclip1[p2 + a3];
59  p[-2 * step] = VP8kclip1[p1 + a2];
60  p[-    step] = VP8kclip1[p0 + a1];
61  p[        0] = VP8kclip1[q0 - a1];
62  p[     step] = VP8kclip1[q1 - a2];
63  p[ 2 * step] = VP8kclip1[q2 - a3];
64}
65
66static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
67  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
68  return (abs_mips32(p1 - p0) > thresh) || (abs_mips32(q1 - q0) > thresh);
69}
70
71static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int thresh) {
72  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
73  return ((2 * abs_mips32(p0 - q0) + (abs_mips32(p1 - q1) >> 1)) <= thresh);
74}
75
76static WEBP_INLINE int needs_filter2(const uint8_t* p,
77                                     int step, int t, int it) {
78  const int p3 = p[-4 * step], p2 = p[-3 * step];
79  const int p1 = p[-2 * step], p0 = p[-step];
80  const int q0 = p[0], q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
81  if ((2 * abs_mips32(p0 - q0) + (abs_mips32(p1 - q1) >> 1)) > t) {
82    return 0;
83  }
84  return abs_mips32(p3 - p2) <= it && abs_mips32(p2 - p1) <= it &&
85         abs_mips32(p1 - p0) <= it && abs_mips32(q3 - q2) <= it &&
86         abs_mips32(q2 - q1) <= it && abs_mips32(q1 - q0) <= it;
87}
88
89static WEBP_INLINE void FilterLoop26(uint8_t* p,
90                                     int hstride, int vstride, int size,
91                                     int thresh, int ithresh, int hev_thresh) {
92  while (size-- > 0) {
93    if (needs_filter2(p, hstride, thresh, ithresh)) {
94      if (hev(p, hstride, hev_thresh)) {
95        do_filter2(p, hstride);
96      } else {
97        do_filter6(p, hstride);
98      }
99    }
100    p += vstride;
101  }
102}
103
104static WEBP_INLINE void FilterLoop24(uint8_t* p,
105                                     int hstride, int vstride, int size,
106                                     int thresh, int ithresh, int hev_thresh) {
107  while (size-- > 0) {
108    if (needs_filter2(p, hstride, thresh, ithresh)) {
109      if (hev(p, hstride, hev_thresh)) {
110        do_filter2(p, hstride);
111      } else {
112        do_filter4(p, hstride);
113      }
114    }
115    p += vstride;
116  }
117}
118
119// on macroblock edges
120static void VFilter16(uint8_t* p, int stride,
121                      int thresh, int ithresh, int hev_thresh) {
122  FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh);
123}
124
125static void HFilter16(uint8_t* p, int stride,
126                      int thresh, int ithresh, int hev_thresh) {
127  FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh);
128}
129
130// 8-pixels wide variant, for chroma filtering
131static void VFilter8(uint8_t* u, uint8_t* v, int stride,
132                     int thresh, int ithresh, int hev_thresh) {
133  FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
134  FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
135}
136
137static void HFilter8(uint8_t* u, uint8_t* v, int stride,
138                     int thresh, int ithresh, int hev_thresh) {
139  FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
140  FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
141}
142
143static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
144                      int thresh, int ithresh, int hev_thresh) {
145  FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
146  FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
147}
148
149static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
150                      int thresh, int ithresh, int hev_thresh) {
151  FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
152  FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
153}
154
155// on three inner edges
156static void VFilter16i(uint8_t* p, int stride,
157                       int thresh, int ithresh, int hev_thresh) {
158  int k;
159  for (k = 3; k > 0; --k) {
160    p += 4 * stride;
161    FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh);
162  }
163}
164
165static void HFilter16i(uint8_t* p, int stride,
166                       int thresh, int ithresh, int hev_thresh) {
167  int k;
168  for (k = 3; k > 0; --k) {
169    p += 4;
170    FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh);
171  }
172}
173
174//------------------------------------------------------------------------------
175// Simple In-loop filtering (Paragraph 15.2)
176
177static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
178  int i;
179  for (i = 0; i < 16; ++i) {
180    if (needs_filter(p + i, stride, thresh)) {
181      do_filter2(p + i, stride);
182    }
183  }
184}
185
186static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
187  int i;
188  for (i = 0; i < 16; ++i) {
189    if (needs_filter(p + i * stride, 1, thresh)) {
190      do_filter2(p + i * stride, 1);
191    }
192  }
193}
194
195static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
196  int k;
197  for (k = 3; k > 0; --k) {
198    p += 4 * stride;
199    SimpleVFilter16(p, stride, thresh);
200  }
201}
202
203static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
204  int k;
205  for (k = 3; k > 0; --k) {
206    p += 4;
207    SimpleHFilter16(p, stride, thresh);
208  }
209}
210
211static void TransformOne(const int16_t* in, uint8_t* dst) {
212  int temp0, temp1, temp2, temp3, temp4;
213  int temp5, temp6, temp7, temp8, temp9;
214  int temp10, temp11, temp12, temp13, temp14;
215  int temp15, temp16, temp17, temp18;
216  int16_t* p_in = (int16_t*)in;
217
218  // loops unrolled and merged to avoid usage of tmp buffer
219  // and to reduce number of stalls. MUL macro is written
220  // in assembler and inlined
221  __asm__ volatile(
222    "lh       %[temp0],  0(%[in])                      \n\t"
223    "lh       %[temp8],  16(%[in])                     \n\t"
224    "lh       %[temp4],  8(%[in])                      \n\t"
225    "lh       %[temp12], 24(%[in])                     \n\t"
226    "addu     %[temp16], %[temp0],  %[temp8]           \n\t"
227    "subu     %[temp0],  %[temp0],  %[temp8]           \n\t"
228    "mul      %[temp8],  %[temp4],  %[kC2]             \n\t"
229    "mul      %[temp17], %[temp12], %[kC1]             \n\t"
230    "mul      %[temp4],  %[temp4],  %[kC1]             \n\t"
231    "mul      %[temp12], %[temp12], %[kC2]             \n\t"
232    "lh       %[temp1],  2(%[in])                      \n\t"
233    "lh       %[temp5],  10(%[in])                     \n\t"
234    "lh       %[temp9],  18(%[in])                     \n\t"
235    "lh       %[temp13], 26(%[in])                     \n\t"
236    "sra      %[temp8],  %[temp8],  16                 \n\t"
237    "sra      %[temp17], %[temp17], 16                 \n\t"
238    "sra      %[temp4],  %[temp4],  16                 \n\t"
239    "sra      %[temp12], %[temp12], 16                 \n\t"
240    "lh       %[temp2],  4(%[in])                      \n\t"
241    "lh       %[temp6],  12(%[in])                     \n\t"
242    "lh       %[temp10], 20(%[in])                     \n\t"
243    "lh       %[temp14], 28(%[in])                     \n\t"
244    "subu     %[temp17], %[temp8],  %[temp17]          \n\t"
245    "addu     %[temp4],  %[temp4],  %[temp12]          \n\t"
246    "addu     %[temp8],  %[temp16], %[temp4]           \n\t"
247    "subu     %[temp4],  %[temp16], %[temp4]           \n\t"
248    "addu     %[temp16], %[temp1],  %[temp9]           \n\t"
249    "subu     %[temp1],  %[temp1],  %[temp9]           \n\t"
250    "lh       %[temp3],  6(%[in])                      \n\t"
251    "lh       %[temp7],  14(%[in])                     \n\t"
252    "lh       %[temp11], 22(%[in])                     \n\t"
253    "lh       %[temp15], 30(%[in])                     \n\t"
254    "addu     %[temp12], %[temp0],  %[temp17]          \n\t"
255    "subu     %[temp0],  %[temp0],  %[temp17]          \n\t"
256    "mul      %[temp9],  %[temp5],  %[kC2]             \n\t"
257    "mul      %[temp17], %[temp13], %[kC1]             \n\t"
258    "mul      %[temp5],  %[temp5],  %[kC1]             \n\t"
259    "mul      %[temp13], %[temp13], %[kC2]             \n\t"
260    "sra      %[temp9],  %[temp9],  16                 \n\t"
261    "sra      %[temp17], %[temp17], 16                 \n\t"
262    "subu     %[temp17], %[temp9],  %[temp17]          \n\t"
263    "sra      %[temp5],  %[temp5],  16                 \n\t"
264    "sra      %[temp13], %[temp13], 16                 \n\t"
265    "addu     %[temp5],  %[temp5],  %[temp13]          \n\t"
266    "addu     %[temp13], %[temp1],  %[temp17]          \n\t"
267    "subu     %[temp1],  %[temp1],  %[temp17]          \n\t"
268    "mul      %[temp17], %[temp14], %[kC1]             \n\t"
269    "mul      %[temp14], %[temp14], %[kC2]             \n\t"
270    "addu     %[temp9],  %[temp16], %[temp5]           \n\t"
271    "subu     %[temp5],  %[temp16], %[temp5]           \n\t"
272    "addu     %[temp16], %[temp2],  %[temp10]          \n\t"
273    "subu     %[temp2],  %[temp2],  %[temp10]          \n\t"
274    "mul      %[temp10], %[temp6],  %[kC2]             \n\t"
275    "mul      %[temp6],  %[temp6],  %[kC1]             \n\t"
276    "sra      %[temp17], %[temp17], 16                 \n\t"
277    "sra      %[temp14], %[temp14], 16                 \n\t"
278    "sra      %[temp10], %[temp10], 16                 \n\t"
279    "sra      %[temp6],  %[temp6],  16                 \n\t"
280    "subu     %[temp17], %[temp10], %[temp17]          \n\t"
281    "addu     %[temp6],  %[temp6],  %[temp14]          \n\t"
282    "addu     %[temp10], %[temp16], %[temp6]           \n\t"
283    "subu     %[temp6],  %[temp16], %[temp6]           \n\t"
284    "addu     %[temp14], %[temp2],  %[temp17]          \n\t"
285    "subu     %[temp2],  %[temp2],  %[temp17]          \n\t"
286    "mul      %[temp17], %[temp15], %[kC1]             \n\t"
287    "mul      %[temp15], %[temp15], %[kC2]             \n\t"
288    "addu     %[temp16], %[temp3],  %[temp11]          \n\t"
289    "subu     %[temp3],  %[temp3],  %[temp11]          \n\t"
290    "mul      %[temp11], %[temp7],  %[kC2]             \n\t"
291    "mul      %[temp7],  %[temp7],  %[kC1]             \n\t"
292    "addiu    %[temp8],  %[temp8],  4                  \n\t"
293    "addiu    %[temp12], %[temp12], 4                  \n\t"
294    "addiu    %[temp0],  %[temp0],  4                  \n\t"
295    "addiu    %[temp4],  %[temp4],  4                  \n\t"
296    "sra      %[temp17], %[temp17], 16                 \n\t"
297    "sra      %[temp15], %[temp15], 16                 \n\t"
298    "sra      %[temp11], %[temp11], 16                 \n\t"
299    "sra      %[temp7],  %[temp7],  16                 \n\t"
300    "subu     %[temp17], %[temp11], %[temp17]          \n\t"
301    "addu     %[temp7],  %[temp7],  %[temp15]          \n\t"
302    "addu     %[temp15], %[temp3],  %[temp17]          \n\t"
303    "subu     %[temp3],  %[temp3],  %[temp17]          \n\t"
304    "addu     %[temp11], %[temp16], %[temp7]           \n\t"
305    "subu     %[temp7],  %[temp16], %[temp7]           \n\t"
306    "addu     %[temp16], %[temp8],  %[temp10]          \n\t"
307    "subu     %[temp8],  %[temp8],  %[temp10]          \n\t"
308    "mul      %[temp10], %[temp9],  %[kC2]             \n\t"
309    "mul      %[temp17], %[temp11], %[kC1]             \n\t"
310    "mul      %[temp9],  %[temp9],  %[kC1]             \n\t"
311    "mul      %[temp11], %[temp11], %[kC2]             \n\t"
312    "sra      %[temp10], %[temp10], 16                 \n\t"
313    "sra      %[temp17], %[temp17], 16                 \n\t"
314    "sra      %[temp9],  %[temp9],  16                 \n\t"
315    "sra      %[temp11], %[temp11], 16                 \n\t"
316    "subu     %[temp17], %[temp10], %[temp17]          \n\t"
317    "addu     %[temp11], %[temp9],  %[temp11]          \n\t"
318    "addu     %[temp10], %[temp12], %[temp14]          \n\t"
319    "subu     %[temp12], %[temp12], %[temp14]          \n\t"
320    "mul      %[temp14], %[temp13], %[kC2]             \n\t"
321    "mul      %[temp9],  %[temp15], %[kC1]             \n\t"
322    "mul      %[temp13], %[temp13], %[kC1]             \n\t"
323    "mul      %[temp15], %[temp15], %[kC2]             \n\t"
324    "sra      %[temp14], %[temp14], 16                 \n\t"
325    "sra      %[temp9],  %[temp9],  16                 \n\t"
326    "sra      %[temp13], %[temp13], 16                 \n\t"
327    "sra      %[temp15], %[temp15], 16                 \n\t"
328    "subu     %[temp9],  %[temp14], %[temp9]           \n\t"
329    "addu     %[temp15], %[temp13], %[temp15]          \n\t"
330    "addu     %[temp14], %[temp0],  %[temp2]           \n\t"
331    "subu     %[temp0],  %[temp0],  %[temp2]           \n\t"
332    "mul      %[temp2],  %[temp1],  %[kC2]             \n\t"
333    "mul      %[temp13], %[temp3],  %[kC1]             \n\t"
334    "mul      %[temp1],  %[temp1],  %[kC1]             \n\t"
335    "mul      %[temp3],  %[temp3],  %[kC2]             \n\t"
336    "sra      %[temp2],  %[temp2],  16                 \n\t"
337    "sra      %[temp13], %[temp13], 16                 \n\t"
338    "sra      %[temp1],  %[temp1],  16                 \n\t"
339    "sra      %[temp3],  %[temp3],  16                 \n\t"
340    "subu     %[temp13], %[temp2],  %[temp13]          \n\t"
341    "addu     %[temp3],  %[temp1],  %[temp3]           \n\t"
342    "addu     %[temp2],  %[temp4],  %[temp6]           \n\t"
343    "subu     %[temp4],  %[temp4],  %[temp6]           \n\t"
344    "mul      %[temp6],  %[temp5],  %[kC2]             \n\t"
345    "mul      %[temp1],  %[temp7],  %[kC1]             \n\t"
346    "mul      %[temp5],  %[temp5],  %[kC1]             \n\t"
347    "mul      %[temp7],  %[temp7],  %[kC2]             \n\t"
348    "sra      %[temp6],  %[temp6],  16                 \n\t"
349    "sra      %[temp1],  %[temp1],  16                 \n\t"
350    "sra      %[temp5],  %[temp5],  16                 \n\t"
351    "sra      %[temp7],  %[temp7],  16                 \n\t"
352    "subu     %[temp1],  %[temp6],  %[temp1]           \n\t"
353    "addu     %[temp7],  %[temp5],  %[temp7]           \n\t"
354    "addu     %[temp5],  %[temp16], %[temp11]          \n\t"
355    "subu     %[temp16], %[temp16], %[temp11]          \n\t"
356    "addu     %[temp11], %[temp8],  %[temp17]          \n\t"
357    "subu     %[temp8],  %[temp8],  %[temp17]          \n\t"
358    "sra      %[temp5],  %[temp5],  3                  \n\t"
359    "sra      %[temp16], %[temp16], 3                  \n\t"
360    "sra      %[temp11], %[temp11], 3                  \n\t"
361    "sra      %[temp8],  %[temp8],  3                  \n\t"
362    "addu     %[temp17], %[temp10], %[temp15]          \n\t"
363    "subu     %[temp10], %[temp10], %[temp15]          \n\t"
364    "addu     %[temp15], %[temp12], %[temp9]           \n\t"
365    "subu     %[temp12], %[temp12], %[temp9]           \n\t"
366    "sra      %[temp17], %[temp17], 3                  \n\t"
367    "sra      %[temp10], %[temp10], 3                  \n\t"
368    "sra      %[temp15], %[temp15], 3                  \n\t"
369    "sra      %[temp12], %[temp12], 3                  \n\t"
370    "addu     %[temp9],  %[temp14], %[temp3]           \n\t"
371    "subu     %[temp14], %[temp14], %[temp3]           \n\t"
372    "addu     %[temp3],  %[temp0],  %[temp13]          \n\t"
373    "subu     %[temp0],  %[temp0],  %[temp13]          \n\t"
374    "sra      %[temp9],  %[temp9],  3                  \n\t"
375    "sra      %[temp14], %[temp14], 3                  \n\t"
376    "sra      %[temp3],  %[temp3],  3                  \n\t"
377    "sra      %[temp0],  %[temp0],  3                  \n\t"
378    "addu     %[temp13], %[temp2],  %[temp7]           \n\t"
379    "subu     %[temp2],  %[temp2],  %[temp7]           \n\t"
380    "addu     %[temp7],  %[temp4],  %[temp1]           \n\t"
381    "subu     %[temp4],  %[temp4],  %[temp1]           \n\t"
382    "sra      %[temp13], %[temp13], 3                  \n\t"
383    "sra      %[temp2],  %[temp2],  3                  \n\t"
384    "sra      %[temp7],  %[temp7],  3                  \n\t"
385    "sra      %[temp4],  %[temp4],  3                  \n\t"
386    "addiu    %[temp6],  $zero,     255                \n\t"
387    "lbu      %[temp1],  0(%[dst])                     \n\t"
388    "addu     %[temp1],  %[temp1],  %[temp5]           \n\t"
389    "sra      %[temp5],  %[temp1],  8                  \n\t"
390    "sra      %[temp18], %[temp1],  31                 \n\t"
391    "beqz     %[temp5],  1f                            \n\t"
392    "xor      %[temp1],  %[temp1],  %[temp1]           \n\t"
393    "movz     %[temp1],  %[temp6],  %[temp18]          \n\t"
394  "1:                                                  \n\t"
395    "lbu      %[temp18], 1(%[dst])                     \n\t"
396    "sb       %[temp1],  0(%[dst])                     \n\t"
397    "addu     %[temp18], %[temp18], %[temp11]          \n\t"
398    "sra      %[temp11], %[temp18], 8                  \n\t"
399    "sra      %[temp1],  %[temp18], 31                 \n\t"
400    "beqz     %[temp11], 2f                            \n\t"
401    "xor      %[temp18], %[temp18], %[temp18]          \n\t"
402    "movz     %[temp18], %[temp6],  %[temp1]           \n\t"
403  "2:                                                  \n\t"
404    "lbu      %[temp1],  2(%[dst])                     \n\t"
405    "sb       %[temp18], 1(%[dst])                     \n\t"
406    "addu     %[temp1],  %[temp1],  %[temp8]           \n\t"
407    "sra      %[temp8],  %[temp1],  8                  \n\t"
408    "sra      %[temp18], %[temp1],  31                 \n\t"
409    "beqz     %[temp8],  3f                            \n\t"
410    "xor      %[temp1],  %[temp1],  %[temp1]           \n\t"
411    "movz     %[temp1],  %[temp6],  %[temp18]          \n\t"
412  "3:                                                  \n\t"
413    "lbu      %[temp18], 3(%[dst])                     \n\t"
414    "sb       %[temp1],  2(%[dst])                     \n\t"
415    "addu     %[temp18], %[temp18], %[temp16]          \n\t"
416    "sra      %[temp16], %[temp18], 8                  \n\t"
417    "sra      %[temp1],  %[temp18], 31                 \n\t"
418    "beqz     %[temp16], 4f                            \n\t"
419    "xor      %[temp18], %[temp18], %[temp18]          \n\t"
420    "movz     %[temp18], %[temp6],  %[temp1]           \n\t"
421  "4:                                                  \n\t"
422    "sb       %[temp18], 3(%[dst])                     \n\t"
423    "lbu      %[temp5],  32(%[dst])                    \n\t"
424    "lbu      %[temp8],  33(%[dst])                    \n\t"
425    "lbu      %[temp11], 34(%[dst])                    \n\t"
426    "lbu      %[temp16], 35(%[dst])                    \n\t"
427    "addu     %[temp5],  %[temp5],  %[temp17]          \n\t"
428    "addu     %[temp8],  %[temp8],  %[temp15]          \n\t"
429    "addu     %[temp11], %[temp11], %[temp12]          \n\t"
430    "addu     %[temp16], %[temp16], %[temp10]          \n\t"
431    "sra      %[temp18], %[temp5],  8                  \n\t"
432    "sra      %[temp1],  %[temp5],  31                 \n\t"
433    "beqz     %[temp18], 5f                            \n\t"
434    "xor      %[temp5],  %[temp5],  %[temp5]           \n\t"
435    "movz     %[temp5],  %[temp6],  %[temp1]           \n\t"
436  "5:                                                  \n\t"
437    "sra      %[temp18], %[temp8],  8                  \n\t"
438    "sra      %[temp1],  %[temp8],  31                 \n\t"
439    "beqz     %[temp18], 6f                            \n\t"
440    "xor      %[temp8],  %[temp8],  %[temp8]           \n\t"
441    "movz     %[temp8],  %[temp6],  %[temp1]           \n\t"
442  "6:                                                  \n\t"
443    "sra      %[temp18], %[temp11], 8                  \n\t"
444    "sra      %[temp1],  %[temp11], 31                 \n\t"
445    "sra      %[temp17], %[temp16], 8                  \n\t"
446    "sra      %[temp15], %[temp16], 31                 \n\t"
447    "beqz     %[temp18], 7f                            \n\t"
448    "xor      %[temp11], %[temp11], %[temp11]          \n\t"
449    "movz     %[temp11], %[temp6],  %[temp1]           \n\t"
450  "7:                                                  \n\t"
451    "beqz     %[temp17], 8f                            \n\t"
452    "xor      %[temp16], %[temp16], %[temp16]          \n\t"
453    "movz     %[temp16], %[temp6],  %[temp15]          \n\t"
454  "8:                                                  \n\t"
455    "sb       %[temp5],  32(%[dst])                    \n\t"
456    "sb       %[temp8],  33(%[dst])                    \n\t"
457    "sb       %[temp11], 34(%[dst])                    \n\t"
458    "sb       %[temp16], 35(%[dst])                    \n\t"
459    "lbu      %[temp5],  64(%[dst])                    \n\t"
460    "lbu      %[temp8],  65(%[dst])                    \n\t"
461    "lbu      %[temp11], 66(%[dst])                    \n\t"
462    "lbu      %[temp16], 67(%[dst])                    \n\t"
463    "addu     %[temp5],  %[temp5],  %[temp9]           \n\t"
464    "addu     %[temp8],  %[temp8],  %[temp3]           \n\t"
465    "addu     %[temp11], %[temp11], %[temp0]           \n\t"
466    "addu     %[temp16], %[temp16], %[temp14]          \n\t"
467    "sra      %[temp18], %[temp5],  8                  \n\t"
468    "sra      %[temp1],  %[temp5],  31                 \n\t"
469    "sra      %[temp17], %[temp8],  8                  \n\t"
470    "sra      %[temp15], %[temp8],  31                 \n\t"
471    "sra      %[temp12], %[temp11], 8                  \n\t"
472    "sra      %[temp10], %[temp11], 31                 \n\t"
473    "sra      %[temp9],  %[temp16], 8                  \n\t"
474    "sra      %[temp3],  %[temp16], 31                 \n\t"
475    "beqz     %[temp18], 9f                            \n\t"
476    "xor      %[temp5],  %[temp5],  %[temp5]           \n\t"
477    "movz     %[temp5],  %[temp6],  %[temp1]           \n\t"
478  "9:                                                  \n\t"
479    "beqz     %[temp17], 10f                           \n\t"
480    "xor      %[temp8],  %[temp8],  %[temp8]           \n\t"
481    "movz     %[temp8],  %[temp6],  %[temp15]          \n\t"
482  "10:                                                 \n\t"
483    "beqz     %[temp12], 11f                           \n\t"
484    "xor      %[temp11], %[temp11], %[temp11]          \n\t"
485    "movz     %[temp11], %[temp6],  %[temp10]          \n\t"
486  "11:                                                 \n\t"
487    "beqz     %[temp9],  12f                           \n\t"
488    "xor      %[temp16], %[temp16], %[temp16]          \n\t"
489    "movz     %[temp16], %[temp6],  %[temp3]           \n\t"
490  "12:                                                 \n\t"
491    "sb       %[temp5],  64(%[dst])                    \n\t"
492    "sb       %[temp8],  65(%[dst])                    \n\t"
493    "sb       %[temp11], 66(%[dst])                    \n\t"
494    "sb       %[temp16], 67(%[dst])                    \n\t"
495    "lbu      %[temp5],  96(%[dst])                    \n\t"
496    "lbu      %[temp8],  97(%[dst])                    \n\t"
497    "lbu      %[temp11], 98(%[dst])                    \n\t"
498    "lbu      %[temp16], 99(%[dst])                    \n\t"
499    "addu     %[temp5],  %[temp5],  %[temp13]          \n\t"
500    "addu     %[temp8],  %[temp8],  %[temp7]           \n\t"
501    "addu     %[temp11], %[temp11], %[temp4]           \n\t"
502    "addu     %[temp16], %[temp16], %[temp2]           \n\t"
503    "sra      %[temp18], %[temp5],  8                  \n\t"
504    "sra      %[temp1],  %[temp5],  31                 \n\t"
505    "sra      %[temp17], %[temp8],  8                  \n\t"
506    "sra      %[temp15], %[temp8],  31                 \n\t"
507    "sra      %[temp12], %[temp11], 8                  \n\t"
508    "sra      %[temp10], %[temp11], 31                 \n\t"
509    "sra      %[temp9],  %[temp16], 8                  \n\t"
510    "sra      %[temp3],  %[temp16], 31                 \n\t"
511    "beqz     %[temp18], 13f                           \n\t"
512    "xor      %[temp5],  %[temp5],  %[temp5]           \n\t"
513    "movz     %[temp5],  %[temp6],  %[temp1]           \n\t"
514  "13:                                                 \n\t"
515    "beqz     %[temp17], 14f                           \n\t"
516    "xor      %[temp8],  %[temp8],  %[temp8]           \n\t"
517    "movz     %[temp8],  %[temp6],  %[temp15]          \n\t"
518  "14:                                                 \n\t"
519    "beqz     %[temp12], 15f                           \n\t"
520    "xor      %[temp11], %[temp11], %[temp11]          \n\t"
521    "movz     %[temp11], %[temp6],  %[temp10]          \n\t"
522  "15:                                                 \n\t"
523    "beqz     %[temp9],  16f                           \n\t"
524    "xor      %[temp16], %[temp16], %[temp16]          \n\t"
525    "movz     %[temp16], %[temp6],  %[temp3]           \n\t"
526  "16:                                                 \n\t"
527    "sb       %[temp5],  96(%[dst])                    \n\t"
528    "sb       %[temp8],  97(%[dst])                    \n\t"
529    "sb       %[temp11], 98(%[dst])                    \n\t"
530    "sb       %[temp16], 99(%[dst])                    \n\t"
531
532    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
533      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
534      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
535      [temp9]"=&r"(temp9), [temp10]"=&r"(temp10), [temp11]"=&r"(temp11),
536      [temp12]"=&r"(temp12), [temp13]"=&r"(temp13), [temp14]"=&r"(temp14),
537      [temp15]"=&r"(temp15), [temp16]"=&r"(temp16), [temp17]"=&r"(temp17),
538      [temp18]"=&r"(temp18)
539    : [in]"r"(p_in), [kC1]"r"(kC1), [kC2]"r"(kC2), [dst]"r"(dst)
540    : "memory", "hi", "lo"
541  );
542}
543
544static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
545  TransformOne(in, dst);
546  if (do_two) {
547    TransformOne(in + 16, dst + 4);
548  }
549}
550
551#endif  // WEBP_USE_MIPS32
552
553//------------------------------------------------------------------------------
554// Entry point
555
556extern void VP8DspInitMIPS32(void);
557
558void VP8DspInitMIPS32(void) {
559#if defined(WEBP_USE_MIPS32)
560  VP8InitClipTables();
561
562  VP8Transform = TransformTwo;
563
564  VP8VFilter16 = VFilter16;
565  VP8HFilter16 = HFilter16;
566  VP8VFilter8 = VFilter8;
567  VP8HFilter8 = HFilter8;
568  VP8VFilter16i = VFilter16i;
569  VP8HFilter16i = HFilter16i;
570  VP8VFilter8i = VFilter8i;
571  VP8HFilter8i = HFilter8i;
572
573  VP8SimpleVFilter16 = SimpleVFilter16;
574  VP8SimpleHFilter16 = SimpleHFilter16;
575  VP8SimpleVFilter16i = SimpleVFilter16i;
576  VP8SimpleHFilter16i = SimpleHFilter16i;
577#endif  // WEBP_USE_MIPS32
578}
579