17c8da7ce66017295a65ec028084b90800be377f8James Zern// Copyright 2014 Google Inc. All Rights Reserved.
27c8da7ce66017295a65ec028084b90800be377f8James Zern//
37c8da7ce66017295a65ec028084b90800be377f8James Zern// Use of this source code is governed by a BSD-style license
47c8da7ce66017295a65ec028084b90800be377f8James Zern// that can be found in the COPYING file in the root of the source
57c8da7ce66017295a65ec028084b90800be377f8James Zern// tree. An additional intellectual property rights grant can be found
67c8da7ce66017295a65ec028084b90800be377f8James Zern// in the file PATENTS. All contributing project authors may
77c8da7ce66017295a65ec028084b90800be377f8James Zern// be found in the AUTHORS file in the root of the source tree.
87c8da7ce66017295a65ec028084b90800be377f8James Zern// -----------------------------------------------------------------------------
97c8da7ce66017295a65ec028084b90800be377f8James Zern//
107c8da7ce66017295a65ec028084b90800be377f8James Zern// MIPS version of dsp functions
117c8da7ce66017295a65ec028084b90800be377f8James Zern//
127c8da7ce66017295a65ec028084b90800be377f8James Zern// Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
137c8da7ce66017295a65ec028084b90800be377f8James Zern//             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
147c8da7ce66017295a65ec028084b90800be377f8James Zern
157c8da7ce66017295a65ec028084b90800be377f8James Zern#include "./dsp.h"
167c8da7ce66017295a65ec028084b90800be377f8James Zern
177c8da7ce66017295a65ec028084b90800be377f8James Zern#if defined(WEBP_USE_MIPS_DSP_R2)
187c8da7ce66017295a65ec028084b90800be377f8James Zern
197c8da7ce66017295a65ec028084b90800be377f8James Zern#include "./mips_macro.h"
207c8da7ce66017295a65ec028084b90800be377f8James Zern
217c8da7ce66017295a65ec028084b90800be377f8James Zernstatic const int kC1 = 20091 + (1 << 16);
227c8da7ce66017295a65ec028084b90800be377f8James Zernstatic const int kC2 = 35468;
237c8da7ce66017295a65ec028084b90800be377f8James Zern
247c8da7ce66017295a65ec028084b90800be377f8James Zern#define MUL(a, b) (((a) * (b)) >> 16)
257c8da7ce66017295a65ec028084b90800be377f8James Zern
267c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void TransformDC(const int16_t* in, uint8_t* dst) {
277c8da7ce66017295a65ec028084b90800be377f8James Zern  int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10;
287c8da7ce66017295a65ec028084b90800be377f8James Zern
297c8da7ce66017295a65ec028084b90800be377f8James Zern  __asm__ volatile (
307c8da7ce66017295a65ec028084b90800be377f8James Zern    LOAD_WITH_OFFSET_X4(temp1, temp2, temp3, temp4, dst,
317c8da7ce66017295a65ec028084b90800be377f8James Zern                        0, 0, 0, 0,
327c8da7ce66017295a65ec028084b90800be377f8James Zern                        0, 1, 2, 3,
337c8da7ce66017295a65ec028084b90800be377f8James Zern                        BPS)
347c8da7ce66017295a65ec028084b90800be377f8James Zern    "lh               %[temp5],  0(%[in])               \n\t"
357c8da7ce66017295a65ec028084b90800be377f8James Zern    "addiu            %[temp5],  %[temp5],  4           \n\t"
367c8da7ce66017295a65ec028084b90800be377f8James Zern    "ins              %[temp5],  %[temp5],  16, 16      \n\t"
377c8da7ce66017295a65ec028084b90800be377f8James Zern    "shra.ph          %[temp5],  %[temp5],  3           \n\t"
387c8da7ce66017295a65ec028084b90800be377f8James Zern    CONVERT_2_BYTES_TO_HALF(temp6, temp7, temp8, temp9, temp10, temp1, temp2,
397c8da7ce66017295a65ec028084b90800be377f8James Zern                            temp3, temp1, temp2, temp3, temp4)
407c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_SAT_SUM_X2(temp6, temp7, temp8, temp9, temp10, temp1, temp2, temp3,
417c8da7ce66017295a65ec028084b90800be377f8James Zern                     temp5, temp5, temp5, temp5, temp5, temp5, temp5, temp5,
427c8da7ce66017295a65ec028084b90800be377f8James Zern                     dst, 0, 1, 2, 3, BPS)
437c8da7ce66017295a65ec028084b90800be377f8James Zern
447c8da7ce66017295a65ec028084b90800be377f8James Zern    OUTPUT_EARLY_CLOBBER_REGS_10()
457c8da7ce66017295a65ec028084b90800be377f8James Zern    : [in]"r"(in), [dst]"r"(dst)
467c8da7ce66017295a65ec028084b90800be377f8James Zern    : "memory"
477c8da7ce66017295a65ec028084b90800be377f8James Zern  );
487c8da7ce66017295a65ec028084b90800be377f8James Zern}
497c8da7ce66017295a65ec028084b90800be377f8James Zern
507c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void TransformAC3(const int16_t* in, uint8_t* dst) {
517c8da7ce66017295a65ec028084b90800be377f8James Zern  const int a = in[0] + 4;
527c8da7ce66017295a65ec028084b90800be377f8James Zern  int c4 = MUL(in[4], kC2);
537c8da7ce66017295a65ec028084b90800be377f8James Zern  const int d4 = MUL(in[4], kC1);
547c8da7ce66017295a65ec028084b90800be377f8James Zern  const int c1 = MUL(in[1], kC2);
557c8da7ce66017295a65ec028084b90800be377f8James Zern  const int d1 = MUL(in[1], kC1);
567c8da7ce66017295a65ec028084b90800be377f8James Zern  int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
577c8da7ce66017295a65ec028084b90800be377f8James Zern  int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
587c8da7ce66017295a65ec028084b90800be377f8James Zern
597c8da7ce66017295a65ec028084b90800be377f8James Zern  __asm__ volatile (
607c8da7ce66017295a65ec028084b90800be377f8James Zern    "ins              %[c4],      %[d4],     16,       16    \n\t"
617c8da7ce66017295a65ec028084b90800be377f8James Zern    "replv.ph         %[temp1],   %[a]                       \n\t"
627c8da7ce66017295a65ec028084b90800be377f8James Zern    "replv.ph         %[temp4],   %[d1]                      \n\t"
637c8da7ce66017295a65ec028084b90800be377f8James Zern    ADD_SUB_HALVES(temp2, temp3, temp1, c4)
647c8da7ce66017295a65ec028084b90800be377f8James Zern    "replv.ph         %[temp5],   %[c1]                      \n\t"
657c8da7ce66017295a65ec028084b90800be377f8James Zern    SHIFT_R_SUM_X2(temp1, temp6, temp7, temp8, temp2, temp9, temp10, temp4,
667c8da7ce66017295a65ec028084b90800be377f8James Zern                   temp2, temp2, temp3, temp3, temp4, temp5, temp4, temp5)
677c8da7ce66017295a65ec028084b90800be377f8James Zern    LOAD_WITH_OFFSET_X4(temp3, temp5, temp11, temp12, dst,
687c8da7ce66017295a65ec028084b90800be377f8James Zern                        0, 0, 0, 0,
697c8da7ce66017295a65ec028084b90800be377f8James Zern                        0, 1, 2, 3,
707c8da7ce66017295a65ec028084b90800be377f8James Zern                        BPS)
717c8da7ce66017295a65ec028084b90800be377f8James Zern    CONVERT_2_BYTES_TO_HALF(temp13, temp14, temp3, temp15, temp5, temp16,
727c8da7ce66017295a65ec028084b90800be377f8James Zern                            temp11, temp17, temp3, temp5, temp11, temp12)
737c8da7ce66017295a65ec028084b90800be377f8James Zern    PACK_2_HALVES_TO_WORD(temp12, temp18, temp7, temp6, temp1, temp8, temp2,
747c8da7ce66017295a65ec028084b90800be377f8James Zern                          temp4, temp7, temp6, temp10, temp9)
757c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_SAT_SUM_X2(temp13, temp14, temp3, temp15, temp5, temp16, temp11,
767c8da7ce66017295a65ec028084b90800be377f8James Zern                     temp17, temp12, temp18, temp1, temp8, temp2, temp4,
777c8da7ce66017295a65ec028084b90800be377f8James Zern                     temp7, temp6, dst, 0, 1, 2, 3, BPS)
787c8da7ce66017295a65ec028084b90800be377f8James Zern
797c8da7ce66017295a65ec028084b90800be377f8James Zern    OUTPUT_EARLY_CLOBBER_REGS_18(),
807c8da7ce66017295a65ec028084b90800be377f8James Zern      [c4]"+&r"(c4)
817c8da7ce66017295a65ec028084b90800be377f8James Zern    : [dst]"r"(dst), [a]"r"(a), [d1]"r"(d1), [d4]"r"(d4), [c1]"r"(c1)
827c8da7ce66017295a65ec028084b90800be377f8James Zern    : "memory"
837c8da7ce66017295a65ec028084b90800be377f8James Zern  );
847c8da7ce66017295a65ec028084b90800be377f8James Zern}
857c8da7ce66017295a65ec028084b90800be377f8James Zern
867c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void TransformOne(const int16_t* in, uint8_t* dst) {
877c8da7ce66017295a65ec028084b90800be377f8James Zern  int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
887c8da7ce66017295a65ec028084b90800be377f8James Zern  int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
897c8da7ce66017295a65ec028084b90800be377f8James Zern
907c8da7ce66017295a65ec028084b90800be377f8James Zern  __asm__ volatile (
917c8da7ce66017295a65ec028084b90800be377f8James Zern    "ulw              %[temp1],   0(%[in])                 \n\t"
927c8da7ce66017295a65ec028084b90800be377f8James Zern    "ulw              %[temp2],   16(%[in])                \n\t"
937c8da7ce66017295a65ec028084b90800be377f8James Zern    LOAD_IN_X2(temp5, temp6, 24, 26)
947c8da7ce66017295a65ec028084b90800be377f8James Zern    ADD_SUB_HALVES(temp3, temp4, temp1, temp2)
957c8da7ce66017295a65ec028084b90800be377f8James Zern    LOAD_IN_X2(temp1, temp2, 8, 10)
967c8da7ce66017295a65ec028084b90800be377f8James Zern    MUL_SHIFT_SUM(temp7, temp8, temp9, temp10, temp11, temp12, temp13, temp14,
977c8da7ce66017295a65ec028084b90800be377f8James Zern                  temp10, temp8, temp9, temp7, temp1, temp2, temp5, temp6,
987c8da7ce66017295a65ec028084b90800be377f8James Zern                  temp13, temp11, temp14, temp12)
997c8da7ce66017295a65ec028084b90800be377f8James Zern    INSERT_HALF_X2(temp8, temp7, temp10, temp9)
1007c8da7ce66017295a65ec028084b90800be377f8James Zern    "ulw              %[temp17],  4(%[in])                 \n\t"
1017c8da7ce66017295a65ec028084b90800be377f8James Zern    "ulw              %[temp18],  20(%[in])                \n\t"
1027c8da7ce66017295a65ec028084b90800be377f8James Zern    ADD_SUB_HALVES(temp1, temp2, temp3, temp8)
1037c8da7ce66017295a65ec028084b90800be377f8James Zern    ADD_SUB_HALVES(temp5, temp6, temp4, temp7)
1047c8da7ce66017295a65ec028084b90800be377f8James Zern    ADD_SUB_HALVES(temp7, temp8, temp17, temp18)
1057c8da7ce66017295a65ec028084b90800be377f8James Zern    LOAD_IN_X2(temp17, temp18, 12, 14)
1067c8da7ce66017295a65ec028084b90800be377f8James Zern    LOAD_IN_X2(temp9, temp10, 28, 30)
1077c8da7ce66017295a65ec028084b90800be377f8James Zern    MUL_SHIFT_SUM(temp11, temp12, temp13, temp14, temp15, temp16, temp4, temp17,
1087c8da7ce66017295a65ec028084b90800be377f8James Zern                  temp12, temp14, temp11, temp13, temp17, temp18, temp9, temp10,
1097c8da7ce66017295a65ec028084b90800be377f8James Zern                  temp15, temp4, temp16, temp17)
1107c8da7ce66017295a65ec028084b90800be377f8James Zern    INSERT_HALF_X2(temp11, temp12, temp13, temp14)
1117c8da7ce66017295a65ec028084b90800be377f8James Zern    ADD_SUB_HALVES(temp17, temp8, temp8, temp11)
1127c8da7ce66017295a65ec028084b90800be377f8James Zern    ADD_SUB_HALVES(temp3, temp4, temp7, temp12)
1137c8da7ce66017295a65ec028084b90800be377f8James Zern
1147c8da7ce66017295a65ec028084b90800be377f8James Zern    // horizontal
1157c8da7ce66017295a65ec028084b90800be377f8James Zern    SRA_16(temp9, temp10, temp11, temp12, temp1, temp2, temp5, temp6)
1167c8da7ce66017295a65ec028084b90800be377f8James Zern    INSERT_HALF_X2(temp1, temp6, temp5, temp2)
1177c8da7ce66017295a65ec028084b90800be377f8James Zern    SRA_16(temp13, temp14, temp15, temp16, temp3, temp4, temp17, temp8)
1187c8da7ce66017295a65ec028084b90800be377f8James Zern    "repl.ph          %[temp2],   0x4                      \n\t"
1197c8da7ce66017295a65ec028084b90800be377f8James Zern    INSERT_HALF_X2(temp3, temp8, temp17, temp4)
1207c8da7ce66017295a65ec028084b90800be377f8James Zern    "addq.ph          %[temp1],   %[temp1],  %[temp2]      \n\t"
1217c8da7ce66017295a65ec028084b90800be377f8James Zern    "addq.ph          %[temp6],   %[temp6],  %[temp2]      \n\t"
1227c8da7ce66017295a65ec028084b90800be377f8James Zern    ADD_SUB_HALVES(temp2, temp4, temp1, temp3)
1237c8da7ce66017295a65ec028084b90800be377f8James Zern    ADD_SUB_HALVES(temp5, temp7, temp6, temp8)
1247c8da7ce66017295a65ec028084b90800be377f8James Zern    MUL_SHIFT_SUM(temp1, temp3, temp6, temp8, temp9, temp13, temp17, temp18,
1257c8da7ce66017295a65ec028084b90800be377f8James Zern                  temp3, temp13, temp1, temp9, temp9, temp13, temp11, temp15,
1267c8da7ce66017295a65ec028084b90800be377f8James Zern                  temp6, temp17, temp8, temp18)
1277c8da7ce66017295a65ec028084b90800be377f8James Zern    MUL_SHIFT_SUM(temp6, temp8, temp18, temp17, temp11, temp15, temp12, temp16,
1287c8da7ce66017295a65ec028084b90800be377f8James Zern                  temp8, temp15, temp6, temp11, temp12, temp16, temp10, temp14,
1297c8da7ce66017295a65ec028084b90800be377f8James Zern                  temp18, temp12, temp17, temp16)
1307c8da7ce66017295a65ec028084b90800be377f8James Zern    INSERT_HALF_X2(temp1, temp3, temp9, temp13)
1317c8da7ce66017295a65ec028084b90800be377f8James Zern    INSERT_HALF_X2(temp6, temp8, temp11, temp15)
1327c8da7ce66017295a65ec028084b90800be377f8James Zern    SHIFT_R_SUM_X2(temp9, temp10, temp11, temp12, temp13, temp14, temp15,
1337c8da7ce66017295a65ec028084b90800be377f8James Zern                   temp16, temp2, temp4, temp5, temp7, temp3, temp1, temp8,
1347c8da7ce66017295a65ec028084b90800be377f8James Zern                   temp6)
1357c8da7ce66017295a65ec028084b90800be377f8James Zern    PACK_2_HALVES_TO_WORD(temp1, temp2, temp3, temp4, temp9, temp12, temp13,
1367c8da7ce66017295a65ec028084b90800be377f8James Zern                          temp16, temp11, temp10, temp15, temp14)
1377c8da7ce66017295a65ec028084b90800be377f8James Zern    LOAD_WITH_OFFSET_X4(temp10, temp11, temp14, temp15, dst,
1387c8da7ce66017295a65ec028084b90800be377f8James Zern                        0, 0, 0, 0,
1397c8da7ce66017295a65ec028084b90800be377f8James Zern                        0, 1, 2, 3,
1407c8da7ce66017295a65ec028084b90800be377f8James Zern                        BPS)
1417c8da7ce66017295a65ec028084b90800be377f8James Zern    CONVERT_2_BYTES_TO_HALF(temp5, temp6, temp7, temp8, temp17, temp18, temp10,
1427c8da7ce66017295a65ec028084b90800be377f8James Zern                            temp11, temp10, temp11, temp14, temp15)
1437c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_SAT_SUM_X2(temp5, temp6, temp7, temp8, temp17, temp18, temp10, temp11,
1447c8da7ce66017295a65ec028084b90800be377f8James Zern                     temp9, temp12, temp1, temp2, temp13, temp16, temp3, temp4,
1457c8da7ce66017295a65ec028084b90800be377f8James Zern                     dst, 0, 1, 2, 3, BPS)
1467c8da7ce66017295a65ec028084b90800be377f8James Zern
1477c8da7ce66017295a65ec028084b90800be377f8James Zern    OUTPUT_EARLY_CLOBBER_REGS_18()
1487c8da7ce66017295a65ec028084b90800be377f8James Zern    : [dst]"r"(dst), [in]"r"(in), [kC1]"r"(kC1), [kC2]"r"(kC2)
1497c8da7ce66017295a65ec028084b90800be377f8James Zern    : "memory", "hi", "lo"
1507c8da7ce66017295a65ec028084b90800be377f8James Zern  );
1517c8da7ce66017295a65ec028084b90800be377f8James Zern}
1527c8da7ce66017295a65ec028084b90800be377f8James Zern
1537c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
1547c8da7ce66017295a65ec028084b90800be377f8James Zern  TransformOne(in, dst);
1557c8da7ce66017295a65ec028084b90800be377f8James Zern  if (do_two) {
1567c8da7ce66017295a65ec028084b90800be377f8James Zern    TransformOne(in + 16, dst + 4);
1577c8da7ce66017295a65ec028084b90800be377f8James Zern  }
1587c8da7ce66017295a65ec028084b90800be377f8James Zern}
1597c8da7ce66017295a65ec028084b90800be377f8James Zern
1607c8da7ce66017295a65ec028084b90800be377f8James Zernstatic WEBP_INLINE void FilterLoop26(uint8_t* p,
1617c8da7ce66017295a65ec028084b90800be377f8James Zern                                     int hstride, int vstride, int size,
1627c8da7ce66017295a65ec028084b90800be377f8James Zern                                     int thresh, int ithresh, int hev_thresh) {
1637c8da7ce66017295a65ec028084b90800be377f8James Zern  const int thresh2 = 2 * thresh + 1;
1647c8da7ce66017295a65ec028084b90800be377f8James Zern  int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
1657c8da7ce66017295a65ec028084b90800be377f8James Zern  int temp10, temp11, temp12, temp13, temp14, temp15;
1667c8da7ce66017295a65ec028084b90800be377f8James Zern
1677c8da7ce66017295a65ec028084b90800be377f8James Zern  __asm__ volatile (
1687c8da7ce66017295a65ec028084b90800be377f8James Zern    ".set      push                                      \n\t"
1697c8da7ce66017295a65ec028084b90800be377f8James Zern    ".set      noreorder                                 \n\t"
1707c8da7ce66017295a65ec028084b90800be377f8James Zern  "1:                                                    \n\t"
1717c8da7ce66017295a65ec028084b90800be377f8James Zern    "negu      %[temp1],  %[hstride]                     \n\t"
1727c8da7ce66017295a65ec028084b90800be377f8James Zern    "addiu     %[size],   %[size],        -1             \n\t"
1737c8da7ce66017295a65ec028084b90800be377f8James Zern    "sll       %[temp2],  %[hstride],     1              \n\t"
1747c8da7ce66017295a65ec028084b90800be377f8James Zern    "sll       %[temp3],  %[temp1],       1              \n\t"
1757c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[temp4],  %[temp2],       %[hstride]     \n\t"
1767c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[temp5],  %[temp3],       %[temp1]       \n\t"
1777c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbu       %[temp7],  0(%[p])                        \n\t"
1787c8da7ce66017295a65ec028084b90800be377f8James Zern    "sll       %[temp6],  %[temp3],       1              \n\t"
1797c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[temp8],  %[temp5](%[p])                 \n\t"
1807c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[temp9],  %[temp3](%[p])                 \n\t"
1817c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[temp10], %[temp1](%[p])                 \n\t"
1827c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[temp11], %[temp6](%[p])                 \n\t"
1837c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[temp12], %[hstride](%[p])               \n\t"
1847c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[temp13], %[temp2](%[p])                 \n\t"
1857c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[temp14], %[temp4](%[p])                 \n\t"
1867c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp1],  %[temp10],      %[temp7]       \n\t"
1877c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp2],  %[temp9],       %[temp12]      \n\t"
1887c8da7ce66017295a65ec028084b90800be377f8James Zern    "absq_s.w  %[temp3],  %[temp1]                       \n\t"
1897c8da7ce66017295a65ec028084b90800be377f8James Zern    "absq_s.w  %[temp4],  %[temp2]                       \n\t"
1907c8da7ce66017295a65ec028084b90800be377f8James Zern    "negu      %[temp1],  %[temp1]                       \n\t"
1917c8da7ce66017295a65ec028084b90800be377f8James Zern    "sll       %[temp3],  %[temp3],       2              \n\t"
1927c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[temp15], %[temp3],       %[temp4]       \n\t"
1937c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp3],  %[temp15],      %[thresh2]     \n\t"
1947c8da7ce66017295a65ec028084b90800be377f8James Zern    "sll       %[temp6],  %[temp1],       1              \n\t"
1957c8da7ce66017295a65ec028084b90800be377f8James Zern    "bgtz      %[temp3],  3f                             \n\t"
1967c8da7ce66017295a65ec028084b90800be377f8James Zern    " subu     %[temp4],  %[temp11],      %[temp8]       \n\t"
1977c8da7ce66017295a65ec028084b90800be377f8James Zern    "absq_s.w  %[temp4],  %[temp4]                       \n\t"
1987c8da7ce66017295a65ec028084b90800be377f8James Zern    "shll_s.w  %[temp2],  %[temp2],       24             \n\t"
1997c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp4],  %[temp4],       %[ithresh]     \n\t"
2007c8da7ce66017295a65ec028084b90800be377f8James Zern    "bgtz      %[temp4],  3f                             \n\t"
2017c8da7ce66017295a65ec028084b90800be377f8James Zern    " subu     %[temp3],  %[temp8],       %[temp9]       \n\t"
2027c8da7ce66017295a65ec028084b90800be377f8James Zern    "absq_s.w  %[temp3],  %[temp3]                       \n\t"
2037c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp3],  %[temp3],       %[ithresh]     \n\t"
2047c8da7ce66017295a65ec028084b90800be377f8James Zern    "bgtz      %[temp3],  3f                             \n\t"
2057c8da7ce66017295a65ec028084b90800be377f8James Zern    " subu     %[temp5],  %[temp9],       %[temp10]      \n\t"
2067c8da7ce66017295a65ec028084b90800be377f8James Zern    "absq_s.w  %[temp3],  %[temp5]                       \n\t"
2077c8da7ce66017295a65ec028084b90800be377f8James Zern    "absq_s.w  %[temp5],  %[temp5]                       \n\t"
2087c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp3],  %[temp3],       %[ithresh]     \n\t"
2097c8da7ce66017295a65ec028084b90800be377f8James Zern    "bgtz      %[temp3],  3f                             \n\t"
2107c8da7ce66017295a65ec028084b90800be377f8James Zern    " subu     %[temp3],  %[temp14],      %[temp13]      \n\t"
2117c8da7ce66017295a65ec028084b90800be377f8James Zern    "absq_s.w  %[temp3],  %[temp3]                       \n\t"
2127c8da7ce66017295a65ec028084b90800be377f8James Zern    "slt       %[temp5],  %[hev_thresh],  %[temp5]       \n\t"
2137c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp3],  %[temp3],       %[ithresh]     \n\t"
2147c8da7ce66017295a65ec028084b90800be377f8James Zern    "bgtz      %[temp3],  3f                             \n\t"
2157c8da7ce66017295a65ec028084b90800be377f8James Zern    " subu     %[temp3],  %[temp13],      %[temp12]      \n\t"
2167c8da7ce66017295a65ec028084b90800be377f8James Zern    "absq_s.w  %[temp3],  %[temp3]                       \n\t"
2177c8da7ce66017295a65ec028084b90800be377f8James Zern    "sra       %[temp4],  %[temp2],       24             \n\t"
2187c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp3],  %[temp3],       %[ithresh]     \n\t"
2197c8da7ce66017295a65ec028084b90800be377f8James Zern    "bgtz      %[temp3],  3f                             \n\t"
2207c8da7ce66017295a65ec028084b90800be377f8James Zern    " subu     %[temp15], %[temp12],      %[temp7]       \n\t"
2217c8da7ce66017295a65ec028084b90800be377f8James Zern    "absq_s.w  %[temp3],  %[temp15]                      \n\t"
2227c8da7ce66017295a65ec028084b90800be377f8James Zern    "absq_s.w  %[temp15], %[temp15]                      \n\t"
2237c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp3],  %[temp3],       %[ithresh]     \n\t"
2247c8da7ce66017295a65ec028084b90800be377f8James Zern    "bgtz      %[temp3],  3f                             \n\t"
2257c8da7ce66017295a65ec028084b90800be377f8James Zern    " slt      %[temp15], %[hev_thresh],  %[temp15]      \n\t"
2267c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[temp3],  %[temp6],       %[temp1]       \n\t"
2277c8da7ce66017295a65ec028084b90800be377f8James Zern    "or        %[temp2],  %[temp5],       %[temp15]      \n\t"
2287c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[temp5],  %[temp4],       %[temp3]       \n\t"
2297c8da7ce66017295a65ec028084b90800be377f8James Zern    "beqz      %[temp2],  4f                             \n\t"
2307c8da7ce66017295a65ec028084b90800be377f8James Zern    " shra_r.w %[temp1],  %[temp5],       3              \n\t"
2317c8da7ce66017295a65ec028084b90800be377f8James Zern    "addiu     %[temp2],  %[temp5],       3              \n\t"
2327c8da7ce66017295a65ec028084b90800be377f8James Zern    "sra       %[temp2],  %[temp2],       3              \n\t"
2337c8da7ce66017295a65ec028084b90800be377f8James Zern    "shll_s.w  %[temp1],  %[temp1],       27             \n\t"
2347c8da7ce66017295a65ec028084b90800be377f8James Zern    "shll_s.w  %[temp2],  %[temp2],       27             \n\t"
2357c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp3],  %[p],           %[hstride]     \n\t"
2367c8da7ce66017295a65ec028084b90800be377f8James Zern    "sra       %[temp1],  %[temp1],       27             \n\t"
2377c8da7ce66017295a65ec028084b90800be377f8James Zern    "sra       %[temp2],  %[temp2],       27             \n\t"
2387c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp1],  %[temp7],       %[temp1]       \n\t"
2397c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[temp2],  %[temp10],      %[temp2]       \n\t"
2407c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[temp2],  %[temp2](%[VP8kclip1])         \n\t"
2417c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[temp1],  %[temp1](%[VP8kclip1])         \n\t"
2427c8da7ce66017295a65ec028084b90800be377f8James Zern    "sb        %[temp2],  0(%[temp3])                    \n\t"
2437c8da7ce66017295a65ec028084b90800be377f8James Zern    "j         3f                                        \n\t"
2447c8da7ce66017295a65ec028084b90800be377f8James Zern    " sb       %[temp1],  0(%[p])                        \n\t"
2457c8da7ce66017295a65ec028084b90800be377f8James Zern  "4:                                                    \n\t"
2467c8da7ce66017295a65ec028084b90800be377f8James Zern    "shll_s.w  %[temp5],  %[temp5],       24             \n\t"
2477c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp14], %[p],           %[hstride]     \n\t"
2487c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp11], %[temp14],      %[hstride]     \n\t"
2497c8da7ce66017295a65ec028084b90800be377f8James Zern    "sra       %[temp6],  %[temp5],       24             \n\t"
2507c8da7ce66017295a65ec028084b90800be377f8James Zern    "sll       %[temp1],  %[temp6],       3              \n\t"
2517c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp15], %[temp11],      %[hstride]     \n\t"
2527c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[temp2],  %[temp6],       %[temp1]       \n\t"
2537c8da7ce66017295a65ec028084b90800be377f8James Zern    "sll       %[temp3],  %[temp2],       1              \n\t"
2547c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[temp4],  %[temp3],       %[temp2]       \n\t"
2557c8da7ce66017295a65ec028084b90800be377f8James Zern    "addiu     %[temp2],  %[temp2],       63             \n\t"
2567c8da7ce66017295a65ec028084b90800be377f8James Zern    "addiu     %[temp3],  %[temp3],       63             \n\t"
2577c8da7ce66017295a65ec028084b90800be377f8James Zern    "addiu     %[temp4],  %[temp4],       63             \n\t"
2587c8da7ce66017295a65ec028084b90800be377f8James Zern    "sra       %[temp2],  %[temp2],       7              \n\t"
2597c8da7ce66017295a65ec028084b90800be377f8James Zern    "sra       %[temp3],  %[temp3],       7              \n\t"
2607c8da7ce66017295a65ec028084b90800be377f8James Zern    "sra       %[temp4],  %[temp4],       7              \n\t"
2617c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[temp1],  %[temp8],       %[temp2]       \n\t"
2627c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[temp5],  %[temp9],       %[temp3]       \n\t"
2637c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[temp6],  %[temp10],      %[temp4]       \n\t"
2647c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp8],  %[temp7],       %[temp4]       \n\t"
2657c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp7],  %[temp12],      %[temp3]       \n\t"
2667c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[temp10], %[p],           %[hstride]     \n\t"
2677c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp9],  %[temp13],      %[temp2]       \n\t"
2687c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[temp12], %[temp10],      %[hstride]     \n\t"
2697c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[temp2],  %[temp1](%[VP8kclip1])         \n\t"
2707c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[temp3],  %[temp5](%[VP8kclip1])         \n\t"
2717c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[temp4],  %[temp6](%[VP8kclip1])         \n\t"
2727c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[temp5],  %[temp8](%[VP8kclip1])         \n\t"
2737c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[temp6],  %[temp7](%[VP8kclip1])         \n\t"
2747c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[temp8],  %[temp9](%[VP8kclip1])         \n\t"
2757c8da7ce66017295a65ec028084b90800be377f8James Zern    "sb        %[temp2],  0(%[temp15])                   \n\t"
2767c8da7ce66017295a65ec028084b90800be377f8James Zern    "sb        %[temp3],  0(%[temp11])                   \n\t"
2777c8da7ce66017295a65ec028084b90800be377f8James Zern    "sb        %[temp4],  0(%[temp14])                   \n\t"
2787c8da7ce66017295a65ec028084b90800be377f8James Zern    "sb        %[temp5],  0(%[p])                        \n\t"
2797c8da7ce66017295a65ec028084b90800be377f8James Zern    "sb        %[temp6],  0(%[temp10])                   \n\t"
2807c8da7ce66017295a65ec028084b90800be377f8James Zern    "sb        %[temp8],  0(%[temp12])                   \n\t"
2817c8da7ce66017295a65ec028084b90800be377f8James Zern  "3:                                                    \n\t"
2827c8da7ce66017295a65ec028084b90800be377f8James Zern    "bgtz      %[size],   1b                             \n\t"
2837c8da7ce66017295a65ec028084b90800be377f8James Zern    " addu     %[p],      %[p],           %[vstride]     \n\t"
2847c8da7ce66017295a65ec028084b90800be377f8James Zern    ".set      pop                                       \n\t"
2857c8da7ce66017295a65ec028084b90800be377f8James Zern    : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),[temp3]"=&r"(temp3),
2867c8da7ce66017295a65ec028084b90800be377f8James Zern      [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
2877c8da7ce66017295a65ec028084b90800be377f8James Zern      [temp7]"=&r"(temp7),[temp8]"=&r"(temp8),[temp9]"=&r"(temp9),
2887c8da7ce66017295a65ec028084b90800be377f8James Zern      [temp10]"=&r"(temp10),[temp11]"=&r"(temp11),[temp12]"=&r"(temp12),
2897c8da7ce66017295a65ec028084b90800be377f8James Zern      [temp13]"=&r"(temp13),[temp14]"=&r"(temp14),[temp15]"=&r"(temp15),
2907c8da7ce66017295a65ec028084b90800be377f8James Zern      [size]"+&r"(size), [p]"+&r"(p)
2917c8da7ce66017295a65ec028084b90800be377f8James Zern    : [hstride]"r"(hstride), [thresh2]"r"(thresh2),
2927c8da7ce66017295a65ec028084b90800be377f8James Zern      [ithresh]"r"(ithresh),[vstride]"r"(vstride), [hev_thresh]"r"(hev_thresh),
2937c8da7ce66017295a65ec028084b90800be377f8James Zern      [VP8kclip1]"r"(VP8kclip1)
2947c8da7ce66017295a65ec028084b90800be377f8James Zern    : "memory"
2957c8da7ce66017295a65ec028084b90800be377f8James Zern  );
2967c8da7ce66017295a65ec028084b90800be377f8James Zern}
2977c8da7ce66017295a65ec028084b90800be377f8James Zern
2987c8da7ce66017295a65ec028084b90800be377f8James Zernstatic WEBP_INLINE void FilterLoop24(uint8_t* p,
2997c8da7ce66017295a65ec028084b90800be377f8James Zern                                     int hstride, int vstride, int size,
3007c8da7ce66017295a65ec028084b90800be377f8James Zern                                     int thresh, int ithresh, int hev_thresh) {
3017c8da7ce66017295a65ec028084b90800be377f8James Zern  int p0, q0, p1, q1, p2, q2, p3, q3;
3027c8da7ce66017295a65ec028084b90800be377f8James Zern  int step1, step2, temp1, temp2, temp3, temp4;
3037c8da7ce66017295a65ec028084b90800be377f8James Zern  uint8_t* pTemp0;
3047c8da7ce66017295a65ec028084b90800be377f8James Zern  uint8_t* pTemp1;
3057c8da7ce66017295a65ec028084b90800be377f8James Zern  const int thresh2 = 2 * thresh + 1;
3067c8da7ce66017295a65ec028084b90800be377f8James Zern
3077c8da7ce66017295a65ec028084b90800be377f8James Zern  __asm__ volatile (
3087c8da7ce66017295a65ec028084b90800be377f8James Zern    ".set      push                                   \n\t"
3097c8da7ce66017295a65ec028084b90800be377f8James Zern    ".set      noreorder                              \n\t"
3107c8da7ce66017295a65ec028084b90800be377f8James Zern    "bltz      %[size],    3f                         \n\t"
3117c8da7ce66017295a65ec028084b90800be377f8James Zern    " nop                                             \n\t"
3127c8da7ce66017295a65ec028084b90800be377f8James Zern  "2:                                                 \n\t"
3137c8da7ce66017295a65ec028084b90800be377f8James Zern    "negu      %[step1],   %[hstride]                 \n\t"
3147c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbu       %[q0],      0(%[p])                    \n\t"
3157c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[p0],      %[step1](%[p])             \n\t"
3167c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[step1],   %[step1],      %[hstride]  \n\t"
3177c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[q1],      %[hstride](%[p])           \n\t"
3187c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp1],   %[p0],         %[q0]       \n\t"
3197c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[p1],      %[step1](%[p])             \n\t"
3207c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[step2],   %[hstride],    %[hstride]  \n\t"
3217c8da7ce66017295a65ec028084b90800be377f8James Zern    "absq_s.w  %[temp2],   %[temp1]                   \n\t"
3227c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp3],   %[p1],         %[q1]       \n\t"
3237c8da7ce66017295a65ec028084b90800be377f8James Zern    "absq_s.w  %[temp4],   %[temp3]                   \n\t"
3247c8da7ce66017295a65ec028084b90800be377f8James Zern    "sll       %[temp2],   %[temp2],      2           \n\t"
3257c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[temp2],   %[temp2],      %[temp4]    \n\t"
3267c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp4],   %[temp2],      %[thresh2]  \n\t"
3277c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[step1],   %[step1],      %[hstride]  \n\t"
3287c8da7ce66017295a65ec028084b90800be377f8James Zern    "bgtz      %[temp4],   0f                         \n\t"
3297c8da7ce66017295a65ec028084b90800be377f8James Zern    " lbux     %[p2],      %[step1](%[p])             \n\t"
3307c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[step1],   %[step1],      %[hstride]  \n\t"
3317c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[q2],      %[step2](%[p])             \n\t"
3327c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[p3],      %[step1](%[p])             \n\t"
3337c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp4],   %[p2],         %[p1]       \n\t"
3347c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[step2],   %[step2],      %[hstride]  \n\t"
3357c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp2],   %[p3],         %[p2]       \n\t"
3367c8da7ce66017295a65ec028084b90800be377f8James Zern    "absq_s.w  %[temp4],   %[temp4]                   \n\t"
3377c8da7ce66017295a65ec028084b90800be377f8James Zern    "absq_s.w  %[temp2],   %[temp2]                   \n\t"
3387c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[q3],      %[step2](%[p])             \n\t"
3397c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp4],   %[temp4],      %[ithresh]  \n\t"
3407c8da7ce66017295a65ec028084b90800be377f8James Zern    "negu      %[temp1],   %[temp1]                   \n\t"
3417c8da7ce66017295a65ec028084b90800be377f8James Zern    "bgtz      %[temp4],   0f                         \n\t"
3427c8da7ce66017295a65ec028084b90800be377f8James Zern    " subu     %[temp2],   %[temp2],      %[ithresh]  \n\t"
3437c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[p3],      %[p1],         %[p0]       \n\t"
3447c8da7ce66017295a65ec028084b90800be377f8James Zern    "bgtz      %[temp2],   0f                         \n\t"
3457c8da7ce66017295a65ec028084b90800be377f8James Zern    " absq_s.w %[p3],      %[p3]                      \n\t"
3467c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp4],   %[q3],         %[q2]       \n\t"
3477c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[pTemp0],  %[p],          %[hstride]  \n\t"
3487c8da7ce66017295a65ec028084b90800be377f8James Zern    "absq_s.w  %[temp4],   %[temp4]                   \n\t"
3497c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp2],   %[p3],         %[ithresh]  \n\t"
3507c8da7ce66017295a65ec028084b90800be377f8James Zern    "sll       %[step1],   %[temp1],      1           \n\t"
3517c8da7ce66017295a65ec028084b90800be377f8James Zern    "bgtz      %[temp2],   0f                         \n\t"
3527c8da7ce66017295a65ec028084b90800be377f8James Zern    " subu     %[temp4],   %[temp4],      %[ithresh]  \n\t"
3537c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp2],   %[q2],         %[q1]       \n\t"
3547c8da7ce66017295a65ec028084b90800be377f8James Zern    "bgtz      %[temp4],   0f                         \n\t"
3557c8da7ce66017295a65ec028084b90800be377f8James Zern    " absq_s.w %[temp2],   %[temp2]                   \n\t"
3567c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[q3],      %[q1],         %[q0]       \n\t"
3577c8da7ce66017295a65ec028084b90800be377f8James Zern    "absq_s.w  %[q3],      %[q3]                      \n\t"
3587c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp2],   %[temp2],      %[ithresh]  \n\t"
3597c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[temp1],   %[temp1],      %[step1]    \n\t"
3607c8da7ce66017295a65ec028084b90800be377f8James Zern    "bgtz      %[temp2],   0f                         \n\t"
3617c8da7ce66017295a65ec028084b90800be377f8James Zern    " subu     %[temp4],   %[q3],         %[ithresh]  \n\t"
3627c8da7ce66017295a65ec028084b90800be377f8James Zern    "slt       %[p3],      %[hev_thresh], %[p3]       \n\t"
3637c8da7ce66017295a65ec028084b90800be377f8James Zern    "bgtz      %[temp4],   0f                         \n\t"
3647c8da7ce66017295a65ec028084b90800be377f8James Zern    " slt      %[q3],      %[hev_thresh], %[q3]       \n\t"
3657c8da7ce66017295a65ec028084b90800be377f8James Zern    "or        %[q3],      %[q3],         %[p3]       \n\t"
3667c8da7ce66017295a65ec028084b90800be377f8James Zern    "bgtz      %[q3],      1f                         \n\t"
3677c8da7ce66017295a65ec028084b90800be377f8James Zern    " shra_r.w %[temp2],   %[temp1],      3           \n\t"
3687c8da7ce66017295a65ec028084b90800be377f8James Zern    "addiu     %[temp1],   %[temp1],      3           \n\t"
3697c8da7ce66017295a65ec028084b90800be377f8James Zern    "sra       %[temp1],   %[temp1],      3           \n\t"
3707c8da7ce66017295a65ec028084b90800be377f8James Zern    "shll_s.w  %[temp2],   %[temp2],      27          \n\t"
3717c8da7ce66017295a65ec028084b90800be377f8James Zern    "shll_s.w  %[temp1],   %[temp1],      27          \n\t"
3727c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[pTemp1],  %[p],          %[hstride]  \n\t"
3737c8da7ce66017295a65ec028084b90800be377f8James Zern    "sra       %[temp2],   %[temp2],      27          \n\t"
3747c8da7ce66017295a65ec028084b90800be377f8James Zern    "sra       %[temp1],   %[temp1],      27          \n\t"
3757c8da7ce66017295a65ec028084b90800be377f8James Zern    "addiu     %[step1],   %[temp2],      1           \n\t"
3767c8da7ce66017295a65ec028084b90800be377f8James Zern    "sra       %[step1],   %[step1],      1           \n\t"
3777c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[p0],      %[p0],         %[temp1]    \n\t"
3787c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[p1],      %[p1],         %[step1]    \n\t"
3797c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[q0],      %[q0],         %[temp2]    \n\t"
3807c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[q1],      %[q1],         %[step1]    \n\t"
3817c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[temp2],   %[p0](%[VP8kclip1])        \n\t"
3827c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[temp3],   %[q0](%[VP8kclip1])        \n\t"
3837c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[temp4],   %[q1](%[VP8kclip1])        \n\t"
3847c8da7ce66017295a65ec028084b90800be377f8James Zern    "sb        %[temp2],   0(%[pTemp0])               \n\t"
3857c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[temp1],   %[p1](%[VP8kclip1])        \n\t"
3867c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[pTemp0],  %[pTemp0],    %[hstride]   \n\t"
3877c8da7ce66017295a65ec028084b90800be377f8James Zern    "sb        %[temp3],   0(%[p])                    \n\t"
3887c8da7ce66017295a65ec028084b90800be377f8James Zern    "sb        %[temp4],   0(%[pTemp1])               \n\t"
3897c8da7ce66017295a65ec028084b90800be377f8James Zern    "j         0f                                     \n\t"
3907c8da7ce66017295a65ec028084b90800be377f8James Zern    " sb       %[temp1],   0(%[pTemp0])               \n\t"
3917c8da7ce66017295a65ec028084b90800be377f8James Zern  "1:                                                 \n\t"
3927c8da7ce66017295a65ec028084b90800be377f8James Zern    "shll_s.w  %[temp3],   %[temp3],      24          \n\t"
3937c8da7ce66017295a65ec028084b90800be377f8James Zern    "sra       %[temp3],   %[temp3],      24          \n\t"
3947c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[temp1],   %[temp1],      %[temp3]    \n\t"
3957c8da7ce66017295a65ec028084b90800be377f8James Zern    "shra_r.w  %[temp2],   %[temp1],      3           \n\t"
3967c8da7ce66017295a65ec028084b90800be377f8James Zern    "addiu     %[temp1],   %[temp1],      3           \n\t"
3977c8da7ce66017295a65ec028084b90800be377f8James Zern    "shll_s.w  %[temp2],   %[temp2],      27          \n\t"
3987c8da7ce66017295a65ec028084b90800be377f8James Zern    "sra       %[temp1],   %[temp1],      3           \n\t"
3997c8da7ce66017295a65ec028084b90800be377f8James Zern    "shll_s.w  %[temp1],   %[temp1],      27          \n\t"
4007c8da7ce66017295a65ec028084b90800be377f8James Zern    "sra       %[temp2],   %[temp2],      27          \n\t"
4017c8da7ce66017295a65ec028084b90800be377f8James Zern    "sra       %[temp1],   %[temp1],      27          \n\t"
4027c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[p0],      %[p0],         %[temp1]    \n\t"
4037c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[q0],      %[q0],         %[temp2]    \n\t"
4047c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[temp1],   %[p0](%[VP8kclip1])        \n\t"
4057c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[temp2],   %[q0](%[VP8kclip1])        \n\t"
4067c8da7ce66017295a65ec028084b90800be377f8James Zern    "sb        %[temp2],   0(%[p])                    \n\t"
4077c8da7ce66017295a65ec028084b90800be377f8James Zern    "sb        %[temp1],   0(%[pTemp0])               \n\t"
4087c8da7ce66017295a65ec028084b90800be377f8James Zern  "0:                                                 \n\t"
4097c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[size],    %[size],       1           \n\t"
4107c8da7ce66017295a65ec028084b90800be377f8James Zern    "bgtz      %[size],    2b                         \n\t"
4117c8da7ce66017295a65ec028084b90800be377f8James Zern    " addu     %[p],       %[p],          %[vstride]  \n\t"
4127c8da7ce66017295a65ec028084b90800be377f8James Zern  "3:                                                 \n\t"
4137c8da7ce66017295a65ec028084b90800be377f8James Zern    ".set      pop                                    \n\t"
4147c8da7ce66017295a65ec028084b90800be377f8James Zern    : [p0]"=&r"(p0), [q0]"=&r"(q0), [p1]"=&r"(p1), [q1]"=&r"(q1),
4157c8da7ce66017295a65ec028084b90800be377f8James Zern      [p2]"=&r"(p2), [q2]"=&r"(q2), [p3]"=&r"(p3), [q3]"=&r"(q3),
4167c8da7ce66017295a65ec028084b90800be377f8James Zern      [step2]"=&r"(step2), [step1]"=&r"(step1), [temp1]"=&r"(temp1),
4177c8da7ce66017295a65ec028084b90800be377f8James Zern      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
4187c8da7ce66017295a65ec028084b90800be377f8James Zern      [pTemp0]"=&r"(pTemp0), [pTemp1]"=&r"(pTemp1), [p]"+&r"(p),
4197c8da7ce66017295a65ec028084b90800be377f8James Zern      [size]"+&r"(size)
4207c8da7ce66017295a65ec028084b90800be377f8James Zern    : [vstride]"r"(vstride), [ithresh]"r"(ithresh),
4217c8da7ce66017295a65ec028084b90800be377f8James Zern      [hev_thresh]"r"(hev_thresh), [hstride]"r"(hstride),
4227c8da7ce66017295a65ec028084b90800be377f8James Zern      [VP8kclip1]"r"(VP8kclip1), [thresh2]"r"(thresh2)
4237c8da7ce66017295a65ec028084b90800be377f8James Zern    : "memory"
4247c8da7ce66017295a65ec028084b90800be377f8James Zern  );
4257c8da7ce66017295a65ec028084b90800be377f8James Zern}
4267c8da7ce66017295a65ec028084b90800be377f8James Zern
4277c8da7ce66017295a65ec028084b90800be377f8James Zern// on macroblock edges
4287c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void VFilter16(uint8_t* p, int stride,
4297c8da7ce66017295a65ec028084b90800be377f8James Zern                      int thresh, int ithresh, int hev_thresh) {
4307c8da7ce66017295a65ec028084b90800be377f8James Zern  FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh);
4317c8da7ce66017295a65ec028084b90800be377f8James Zern}
4327c8da7ce66017295a65ec028084b90800be377f8James Zern
4337c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void HFilter16(uint8_t* p, int stride,
4347c8da7ce66017295a65ec028084b90800be377f8James Zern                      int thresh, int ithresh, int hev_thresh) {
4357c8da7ce66017295a65ec028084b90800be377f8James Zern  FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh);
4367c8da7ce66017295a65ec028084b90800be377f8James Zern}
4377c8da7ce66017295a65ec028084b90800be377f8James Zern
4387c8da7ce66017295a65ec028084b90800be377f8James Zern// 8-pixels wide variant, for chroma filtering
4397c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void VFilter8(uint8_t* u, uint8_t* v, int stride,
4407c8da7ce66017295a65ec028084b90800be377f8James Zern                     int thresh, int ithresh, int hev_thresh) {
4417c8da7ce66017295a65ec028084b90800be377f8James Zern  FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
4427c8da7ce66017295a65ec028084b90800be377f8James Zern  FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
4437c8da7ce66017295a65ec028084b90800be377f8James Zern}
4447c8da7ce66017295a65ec028084b90800be377f8James Zern
4457c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void HFilter8(uint8_t* u, uint8_t* v, int stride,
4467c8da7ce66017295a65ec028084b90800be377f8James Zern                     int thresh, int ithresh, int hev_thresh) {
4477c8da7ce66017295a65ec028084b90800be377f8James Zern  FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
4487c8da7ce66017295a65ec028084b90800be377f8James Zern  FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
4497c8da7ce66017295a65ec028084b90800be377f8James Zern}
4507c8da7ce66017295a65ec028084b90800be377f8James Zern
4517c8da7ce66017295a65ec028084b90800be377f8James Zern// on three inner edges
4527c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void VFilter16i(uint8_t* p, int stride,
4537c8da7ce66017295a65ec028084b90800be377f8James Zern                       int thresh, int ithresh, int hev_thresh) {
4547c8da7ce66017295a65ec028084b90800be377f8James Zern  int k;
4557c8da7ce66017295a65ec028084b90800be377f8James Zern  for (k = 3; k > 0; --k) {
4567c8da7ce66017295a65ec028084b90800be377f8James Zern    p += 4 * stride;
4577c8da7ce66017295a65ec028084b90800be377f8James Zern    FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh);
4587c8da7ce66017295a65ec028084b90800be377f8James Zern  }
4597c8da7ce66017295a65ec028084b90800be377f8James Zern}
4607c8da7ce66017295a65ec028084b90800be377f8James Zern
4617c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void HFilter16i(uint8_t* p, int stride,
4627c8da7ce66017295a65ec028084b90800be377f8James Zern                       int thresh, int ithresh, int hev_thresh) {
4637c8da7ce66017295a65ec028084b90800be377f8James Zern  int k;
4647c8da7ce66017295a65ec028084b90800be377f8James Zern  for (k = 3; k > 0; --k) {
4657c8da7ce66017295a65ec028084b90800be377f8James Zern    p += 4;
4667c8da7ce66017295a65ec028084b90800be377f8James Zern    FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh);
4677c8da7ce66017295a65ec028084b90800be377f8James Zern  }
4687c8da7ce66017295a65ec028084b90800be377f8James Zern}
4697c8da7ce66017295a65ec028084b90800be377f8James Zern
4707c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void VFilter8i(uint8_t* u, uint8_t* v, int stride,
4717c8da7ce66017295a65ec028084b90800be377f8James Zern                      int thresh, int ithresh, int hev_thresh) {
4727c8da7ce66017295a65ec028084b90800be377f8James Zern  FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
4737c8da7ce66017295a65ec028084b90800be377f8James Zern  FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
4747c8da7ce66017295a65ec028084b90800be377f8James Zern}
4757c8da7ce66017295a65ec028084b90800be377f8James Zern
4767c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void HFilter8i(uint8_t* u, uint8_t* v, int stride,
4777c8da7ce66017295a65ec028084b90800be377f8James Zern                      int thresh, int ithresh, int hev_thresh) {
4787c8da7ce66017295a65ec028084b90800be377f8James Zern  FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
4797c8da7ce66017295a65ec028084b90800be377f8James Zern  FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
4807c8da7ce66017295a65ec028084b90800be377f8James Zern}
4817c8da7ce66017295a65ec028084b90800be377f8James Zern
4827c8da7ce66017295a65ec028084b90800be377f8James Zern#undef MUL
4837c8da7ce66017295a65ec028084b90800be377f8James Zern
4847c8da7ce66017295a65ec028084b90800be377f8James Zern//------------------------------------------------------------------------------
4857c8da7ce66017295a65ec028084b90800be377f8James Zern// Simple In-loop filtering (Paragraph 15.2)
4867c8da7ce66017295a65ec028084b90800be377f8James Zern
4877c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
4887c8da7ce66017295a65ec028084b90800be377f8James Zern  int i;
4897c8da7ce66017295a65ec028084b90800be377f8James Zern  const int thresh2 = 2 * thresh + 1;
4907c8da7ce66017295a65ec028084b90800be377f8James Zern  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
4917c8da7ce66017295a65ec028084b90800be377f8James Zern  uint8_t* p1 = p - stride;
4927c8da7ce66017295a65ec028084b90800be377f8James Zern  __asm__ volatile (
4937c8da7ce66017295a65ec028084b90800be377f8James Zern    ".set      push                                      \n\t"
4947c8da7ce66017295a65ec028084b90800be377f8James Zern    ".set      noreorder                                 \n\t"
4957c8da7ce66017295a65ec028084b90800be377f8James Zern    "li        %[i],        16                           \n\t"
4967c8da7ce66017295a65ec028084b90800be377f8James Zern  "0:                                                    \n\t"
4977c8da7ce66017295a65ec028084b90800be377f8James Zern    "negu      %[temp4],    %[stride]                    \n\t"
4987c8da7ce66017295a65ec028084b90800be377f8James Zern    "sll       %[temp5],    %[temp4],       1            \n\t"
4997c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbu       %[temp2],    0(%[p])                      \n\t"
5007c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[temp3],    %[stride](%[p])              \n\t"
5017c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[temp1],    %[temp4](%[p])               \n\t"
5027c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[temp0],    %[temp5](%[p])               \n\t"
5037c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp7],    %[temp1],       %[temp2]     \n\t"
5047c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp6],    %[temp0],       %[temp3]     \n\t"
5057c8da7ce66017295a65ec028084b90800be377f8James Zern    "absq_s.w  %[temp4],    %[temp7]                     \n\t"
5067c8da7ce66017295a65ec028084b90800be377f8James Zern    "absq_s.w  %[temp5],    %[temp6]                     \n\t"
5077c8da7ce66017295a65ec028084b90800be377f8James Zern    "sll       %[temp4],    %[temp4],       2            \n\t"
5087c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp5],    %[temp5],       %[thresh2]   \n\t"
5097c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[temp5],    %[temp4],       %[temp5]     \n\t"
5107c8da7ce66017295a65ec028084b90800be377f8James Zern    "negu      %[temp8],    %[temp7]                     \n\t"
5117c8da7ce66017295a65ec028084b90800be377f8James Zern    "bgtz      %[temp5],    1f                           \n\t"
5127c8da7ce66017295a65ec028084b90800be377f8James Zern    " addiu    %[i],        %[i],           -1           \n\t"
5137c8da7ce66017295a65ec028084b90800be377f8James Zern    "sll       %[temp4],    %[temp8],       1            \n\t"
5147c8da7ce66017295a65ec028084b90800be377f8James Zern    "shll_s.w  %[temp5],    %[temp6],       24           \n\t"
5157c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[temp3],    %[temp4],       %[temp8]     \n\t"
5167c8da7ce66017295a65ec028084b90800be377f8James Zern    "sra       %[temp5],    %[temp5],       24           \n\t"
5177c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[temp3],    %[temp3],       %[temp5]     \n\t"
5187c8da7ce66017295a65ec028084b90800be377f8James Zern    "addiu     %[temp7],    %[temp3],       3            \n\t"
5197c8da7ce66017295a65ec028084b90800be377f8James Zern    "sra       %[temp7],    %[temp7],       3            \n\t"
5207c8da7ce66017295a65ec028084b90800be377f8James Zern    "shra_r.w  %[temp8],    %[temp3],       3            \n\t"
5217c8da7ce66017295a65ec028084b90800be377f8James Zern    "shll_s.w  %[temp0],    %[temp7],       27           \n\t"
5227c8da7ce66017295a65ec028084b90800be377f8James Zern    "shll_s.w  %[temp4],    %[temp8],       27           \n\t"
5237c8da7ce66017295a65ec028084b90800be377f8James Zern    "sra       %[temp0],    %[temp0],       27           \n\t"
5247c8da7ce66017295a65ec028084b90800be377f8James Zern    "sra       %[temp4],    %[temp4],       27           \n\t"
5257c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[temp7],    %[temp1],       %[temp0]     \n\t"
5267c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp2],    %[temp2],       %[temp4]     \n\t"
5277c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[temp3],    %[temp7](%[VP8kclip1])       \n\t"
5287c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[temp4],    %[temp2](%[VP8kclip1])       \n\t"
5297c8da7ce66017295a65ec028084b90800be377f8James Zern    "sb        %[temp3],    0(%[p1])                     \n\t"
5307c8da7ce66017295a65ec028084b90800be377f8James Zern    "sb        %[temp4],    0(%[p])                      \n\t"
5317c8da7ce66017295a65ec028084b90800be377f8James Zern  "1:                                                    \n\t"
5327c8da7ce66017295a65ec028084b90800be377f8James Zern    "addiu     %[p1],       %[p1],          1            \n\t"
5337c8da7ce66017295a65ec028084b90800be377f8James Zern    "bgtz      %[i],        0b                           \n\t"
5347c8da7ce66017295a65ec028084b90800be377f8James Zern    " addiu    %[p],        %[p],           1            \n\t"
5357c8da7ce66017295a65ec028084b90800be377f8James Zern    " .set     pop                                       \n\t"
5367c8da7ce66017295a65ec028084b90800be377f8James Zern    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
5377c8da7ce66017295a65ec028084b90800be377f8James Zern      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
5387c8da7ce66017295a65ec028084b90800be377f8James Zern      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
5397c8da7ce66017295a65ec028084b90800be377f8James Zern      [p]"+&r"(p), [i]"=&r"(i), [p1]"+&r"(p1)
5407c8da7ce66017295a65ec028084b90800be377f8James Zern    : [stride]"r"(stride), [VP8kclip1]"r"(VP8kclip1), [thresh2]"r"(thresh2)
5417c8da7ce66017295a65ec028084b90800be377f8James Zern    : "memory"
5427c8da7ce66017295a65ec028084b90800be377f8James Zern  );
5437c8da7ce66017295a65ec028084b90800be377f8James Zern}
5447c8da7ce66017295a65ec028084b90800be377f8James Zern
5457c8da7ce66017295a65ec028084b90800be377f8James Zern// TEMP0 = SRC[A + A1 * BPS]
5467c8da7ce66017295a65ec028084b90800be377f8James Zern// TEMP1 = SRC[B + B1 * BPS]
5477c8da7ce66017295a65ec028084b90800be377f8James Zern// TEMP2 = SRC[C + C1 * BPS]
5487c8da7ce66017295a65ec028084b90800be377f8James Zern// TEMP3 = SRC[D + D1 * BPS]
5497c8da7ce66017295a65ec028084b90800be377f8James Zern#define LOAD_4_BYTES(TEMP0, TEMP1, TEMP2, TEMP3,                               \
5507c8da7ce66017295a65ec028084b90800be377f8James Zern                     A, A1, B, B1, C, C1, D, D1, SRC)                          \
5517c8da7ce66017295a65ec028084b90800be377f8James Zern  "lbu      %[" #TEMP0 "],   " #A "+" #A1 "*" XSTR(BPS) "(%[" #SRC "]) \n\t"   \
5527c8da7ce66017295a65ec028084b90800be377f8James Zern  "lbu      %[" #TEMP1 "],   " #B "+" #B1 "*" XSTR(BPS) "(%[" #SRC "]) \n\t"   \
5537c8da7ce66017295a65ec028084b90800be377f8James Zern  "lbu      %[" #TEMP2 "],   " #C "+" #C1 "*" XSTR(BPS) "(%[" #SRC "]) \n\t"   \
5547c8da7ce66017295a65ec028084b90800be377f8James Zern  "lbu      %[" #TEMP3 "],   " #D "+" #D1 "*" XSTR(BPS) "(%[" #SRC "]) \n\t"   \
5557c8da7ce66017295a65ec028084b90800be377f8James Zern
5567c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
5577c8da7ce66017295a65ec028084b90800be377f8James Zern  int i;
5587c8da7ce66017295a65ec028084b90800be377f8James Zern  const int thresh2 = 2 * thresh + 1;
5597c8da7ce66017295a65ec028084b90800be377f8James Zern  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
5607c8da7ce66017295a65ec028084b90800be377f8James Zern  __asm__ volatile (
5617c8da7ce66017295a65ec028084b90800be377f8James Zern    ".set      push                                     \n\t"
5627c8da7ce66017295a65ec028084b90800be377f8James Zern    ".set      noreorder                                \n\t"
5637c8da7ce66017295a65ec028084b90800be377f8James Zern    "li        %[i],       16                           \n\t"
5647c8da7ce66017295a65ec028084b90800be377f8James Zern  "0:                                                   \n\t"
5657c8da7ce66017295a65ec028084b90800be377f8James Zern    LOAD_4_BYTES(temp0, temp1, temp2, temp3, -2, 0, -1, 0, 0, 0, 1, 0, p)
5667c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp7],    %[temp1],       %[temp2]    \n\t"
5677c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp6],    %[temp0],       %[temp3]    \n\t"
5687c8da7ce66017295a65ec028084b90800be377f8James Zern    "absq_s.w  %[temp4],    %[temp7]                    \n\t"
5697c8da7ce66017295a65ec028084b90800be377f8James Zern    "absq_s.w  %[temp5],    %[temp6]                    \n\t"
5707c8da7ce66017295a65ec028084b90800be377f8James Zern    "sll       %[temp4],    %[temp4],       2           \n\t"
5717c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[temp5],    %[temp4],       %[temp5]    \n\t"
5727c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp5],    %[temp5],       %[thresh2]  \n\t"
5737c8da7ce66017295a65ec028084b90800be377f8James Zern    "negu      %[temp8],    %[temp7]                    \n\t"
5747c8da7ce66017295a65ec028084b90800be377f8James Zern    "bgtz      %[temp5],    1f                          \n\t"
5757c8da7ce66017295a65ec028084b90800be377f8James Zern    " addiu    %[i],        %[i],           -1          \n\t"
5767c8da7ce66017295a65ec028084b90800be377f8James Zern    "sll       %[temp4],    %[temp8],       1           \n\t"
5777c8da7ce66017295a65ec028084b90800be377f8James Zern    "shll_s.w  %[temp5],    %[temp6],       24          \n\t"
5787c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[temp3],    %[temp4],       %[temp8]    \n\t"
5797c8da7ce66017295a65ec028084b90800be377f8James Zern    "sra       %[temp5],    %[temp5],       24          \n\t"
5807c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[temp3],    %[temp3],       %[temp5]    \n\t"
5817c8da7ce66017295a65ec028084b90800be377f8James Zern    "addiu     %[temp7],    %[temp3],       3           \n\t"
5827c8da7ce66017295a65ec028084b90800be377f8James Zern    "sra       %[temp7],    %[temp7],       3           \n\t"
5837c8da7ce66017295a65ec028084b90800be377f8James Zern    "shra_r.w  %[temp8],    %[temp3],       3           \n\t"
5847c8da7ce66017295a65ec028084b90800be377f8James Zern    "shll_s.w  %[temp0],    %[temp7],       27          \n\t"
5857c8da7ce66017295a65ec028084b90800be377f8James Zern    "shll_s.w  %[temp4],    %[temp8],       27          \n\t"
5867c8da7ce66017295a65ec028084b90800be377f8James Zern    "sra       %[temp0],    %[temp0],       27          \n\t"
5877c8da7ce66017295a65ec028084b90800be377f8James Zern    "sra       %[temp4],    %[temp4],       27          \n\t"
5887c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu      %[temp7],    %[temp1],       %[temp0]    \n\t"
5897c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu      %[temp2],    %[temp2],       %[temp4]    \n\t"
5907c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[temp3],    %[temp7](%[VP8kclip1])      \n\t"
5917c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbux      %[temp4],    %[temp2](%[VP8kclip1])      \n\t"
5927c8da7ce66017295a65ec028084b90800be377f8James Zern    "sb        %[temp3],    -1(%[p])                    \n\t"
5937c8da7ce66017295a65ec028084b90800be377f8James Zern    "sb        %[temp4],    0(%[p])                     \n\t"
5947c8da7ce66017295a65ec028084b90800be377f8James Zern  "1:                                                   \n\t"
5957c8da7ce66017295a65ec028084b90800be377f8James Zern    "bgtz      %[i],        0b                          \n\t"
5967c8da7ce66017295a65ec028084b90800be377f8James Zern    " addu     %[p],        %[p],           %[stride]   \n\t"
5977c8da7ce66017295a65ec028084b90800be377f8James Zern    ".set      pop                                      \n\t"
5987c8da7ce66017295a65ec028084b90800be377f8James Zern    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
5997c8da7ce66017295a65ec028084b90800be377f8James Zern      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
6007c8da7ce66017295a65ec028084b90800be377f8James Zern      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
6017c8da7ce66017295a65ec028084b90800be377f8James Zern      [p]"+&r"(p), [i]"=&r"(i)
6027c8da7ce66017295a65ec028084b90800be377f8James Zern    : [stride]"r"(stride), [VP8kclip1]"r"(VP8kclip1), [thresh2]"r"(thresh2)
6037c8da7ce66017295a65ec028084b90800be377f8James Zern    : "memory"
6047c8da7ce66017295a65ec028084b90800be377f8James Zern  );
6057c8da7ce66017295a65ec028084b90800be377f8James Zern}
6067c8da7ce66017295a65ec028084b90800be377f8James Zern
6077c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
6087c8da7ce66017295a65ec028084b90800be377f8James Zern  int k;
6097c8da7ce66017295a65ec028084b90800be377f8James Zern  for (k = 3; k > 0; --k) {
6107c8da7ce66017295a65ec028084b90800be377f8James Zern    p += 4 * stride;
6117c8da7ce66017295a65ec028084b90800be377f8James Zern    SimpleVFilter16(p, stride, thresh);
6127c8da7ce66017295a65ec028084b90800be377f8James Zern  }
6137c8da7ce66017295a65ec028084b90800be377f8James Zern}
6147c8da7ce66017295a65ec028084b90800be377f8James Zern
6157c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
6167c8da7ce66017295a65ec028084b90800be377f8James Zern  int k;
6177c8da7ce66017295a65ec028084b90800be377f8James Zern  for (k = 3; k > 0; --k) {
6187c8da7ce66017295a65ec028084b90800be377f8James Zern    p += 4;
6197c8da7ce66017295a65ec028084b90800be377f8James Zern    SimpleHFilter16(p, stride, thresh);
6207c8da7ce66017295a65ec028084b90800be377f8James Zern  }
6217c8da7ce66017295a65ec028084b90800be377f8James Zern}
6227c8da7ce66017295a65ec028084b90800be377f8James Zern
6237c8da7ce66017295a65ec028084b90800be377f8James Zern// DST[A * BPS]     = TEMP0
6247c8da7ce66017295a65ec028084b90800be377f8James Zern// DST[B + C * BPS] = TEMP1
6257c8da7ce66017295a65ec028084b90800be377f8James Zern#define STORE_8_BYTES(TEMP0, TEMP1, A, B, C, DST)                              \
6267c8da7ce66017295a65ec028084b90800be377f8James Zern  "usw    %[" #TEMP0 "],   " #A "*" XSTR(BPS) "(%[" #DST "])         \n\t"     \
6277c8da7ce66017295a65ec028084b90800be377f8James Zern  "usw    %[" #TEMP1 "],   " #B "+" #C "*" XSTR(BPS) "(%[" #DST "])  \n\t"
6287c8da7ce66017295a65ec028084b90800be377f8James Zern
6297c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void VE4(uint8_t* dst) {    // vertical
6307c8da7ce66017295a65ec028084b90800be377f8James Zern  const uint8_t* top = dst - BPS;
6317c8da7ce66017295a65ec028084b90800be377f8James Zern  int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
6327c8da7ce66017295a65ec028084b90800be377f8James Zern  __asm__ volatile (
6337c8da7ce66017295a65ec028084b90800be377f8James Zern    "ulw             %[temp0],   -1(%[top])              \n\t"
6347c8da7ce66017295a65ec028084b90800be377f8James Zern    "ulh             %[temp1],   3(%[top])               \n\t"
6357c8da7ce66017295a65ec028084b90800be377f8James Zern    "preceu.ph.qbr   %[temp2],   %[temp0]                \n\t"
6367c8da7ce66017295a65ec028084b90800be377f8James Zern    "preceu.ph.qbl   %[temp3],   %[temp0]                \n\t"
6377c8da7ce66017295a65ec028084b90800be377f8James Zern    "preceu.ph.qbr   %[temp4],   %[temp1]                \n\t"
6387c8da7ce66017295a65ec028084b90800be377f8James Zern    "packrl.ph       %[temp5],   %[temp3],    %[temp2]   \n\t"
6397c8da7ce66017295a65ec028084b90800be377f8James Zern    "packrl.ph       %[temp6],   %[temp4],    %[temp3]   \n\t"
6407c8da7ce66017295a65ec028084b90800be377f8James Zern    "shll.ph         %[temp5],   %[temp5],    1          \n\t"
6417c8da7ce66017295a65ec028084b90800be377f8James Zern    "shll.ph         %[temp6],   %[temp6],    1          \n\t"
6427c8da7ce66017295a65ec028084b90800be377f8James Zern    "addq.ph         %[temp2],   %[temp5],    %[temp2]   \n\t"
6437c8da7ce66017295a65ec028084b90800be377f8James Zern    "addq.ph         %[temp6],   %[temp6],    %[temp4]   \n\t"
6447c8da7ce66017295a65ec028084b90800be377f8James Zern    "addq.ph         %[temp2],   %[temp2],    %[temp3]   \n\t"
6457c8da7ce66017295a65ec028084b90800be377f8James Zern    "addq.ph         %[temp6],   %[temp6],    %[temp3]   \n\t"
6467c8da7ce66017295a65ec028084b90800be377f8James Zern    "shra_r.ph       %[temp2],   %[temp2],    2          \n\t"
6477c8da7ce66017295a65ec028084b90800be377f8James Zern    "shra_r.ph       %[temp6],   %[temp6],    2          \n\t"
6487c8da7ce66017295a65ec028084b90800be377f8James Zern    "precr.qb.ph     %[temp4],   %[temp6],    %[temp2]   \n\t"
6497c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp4, temp4, 0, 0, 1, dst)
6507c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp4, temp4, 2, 0, 3, dst)
6517c8da7ce66017295a65ec028084b90800be377f8James Zern    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
6527c8da7ce66017295a65ec028084b90800be377f8James Zern      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
6537c8da7ce66017295a65ec028084b90800be377f8James Zern      [temp6]"=&r"(temp6)
6547c8da7ce66017295a65ec028084b90800be377f8James Zern    : [top]"r"(top), [dst]"r"(dst)
6557c8da7ce66017295a65ec028084b90800be377f8James Zern    : "memory"
6567c8da7ce66017295a65ec028084b90800be377f8James Zern  );
6577c8da7ce66017295a65ec028084b90800be377f8James Zern}
6587c8da7ce66017295a65ec028084b90800be377f8James Zern
6597c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void DC4(uint8_t* dst) {   // DC
6607c8da7ce66017295a65ec028084b90800be377f8James Zern  int temp0, temp1, temp2, temp3, temp4;
6617c8da7ce66017295a65ec028084b90800be377f8James Zern  __asm__ volatile (
6627c8da7ce66017295a65ec028084b90800be377f8James Zern    "ulw          %[temp0],   -1*" XSTR(BPS) "(%[dst]) \n\t"
6637c8da7ce66017295a65ec028084b90800be377f8James Zern    LOAD_4_BYTES(temp1, temp2, temp3, temp4, -1, 0, -1, 1, -1, 2, -1, 3, dst)
6647c8da7ce66017295a65ec028084b90800be377f8James Zern    "ins          %[temp1],   %[temp2],    8,     8    \n\t"
6657c8da7ce66017295a65ec028084b90800be377f8James Zern    "ins          %[temp1],   %[temp3],    16,    8    \n\t"
6667c8da7ce66017295a65ec028084b90800be377f8James Zern    "ins          %[temp1],   %[temp4],    24,    8    \n\t"
6677c8da7ce66017295a65ec028084b90800be377f8James Zern    "raddu.w.qb   %[temp0],   %[temp0]                 \n\t"
6687c8da7ce66017295a65ec028084b90800be377f8James Zern    "raddu.w.qb   %[temp1],   %[temp1]                 \n\t"
6697c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu         %[temp0],   %[temp0],    %[temp1]    \n\t"
6707c8da7ce66017295a65ec028084b90800be377f8James Zern    "shra_r.w     %[temp0],   %[temp0],    3           \n\t"
6717c8da7ce66017295a65ec028084b90800be377f8James Zern    "replv.qb     %[temp0],   %[temp0]                 \n\t"
6727c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp0, temp0, 0, 0, 1, dst)
6737c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp0, temp0, 2, 0, 3, dst)
6747c8da7ce66017295a65ec028084b90800be377f8James Zern    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
6757c8da7ce66017295a65ec028084b90800be377f8James Zern      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4)
6767c8da7ce66017295a65ec028084b90800be377f8James Zern    : [dst]"r"(dst)
6777c8da7ce66017295a65ec028084b90800be377f8James Zern    : "memory"
6787c8da7ce66017295a65ec028084b90800be377f8James Zern  );
6797c8da7ce66017295a65ec028084b90800be377f8James Zern}
6807c8da7ce66017295a65ec028084b90800be377f8James Zern
6817c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void RD4(uint8_t* dst) {   // Down-right
6827c8da7ce66017295a65ec028084b90800be377f8James Zern  int temp0, temp1, temp2, temp3, temp4;
6837c8da7ce66017295a65ec028084b90800be377f8James Zern  int temp5, temp6, temp7, temp8;
6847c8da7ce66017295a65ec028084b90800be377f8James Zern  __asm__ volatile (
6857c8da7ce66017295a65ec028084b90800be377f8James Zern    LOAD_4_BYTES(temp0, temp1, temp2, temp3, -1, 0, -1, 1, -1, 2, -1, 3, dst)
6867c8da7ce66017295a65ec028084b90800be377f8James Zern    "ulw            %[temp7],   -1-" XSTR(BPS) "(%[dst])       \n\t"
6877c8da7ce66017295a65ec028084b90800be377f8James Zern    "ins            %[temp1],   %[temp0], 16, 16               \n\t"
6887c8da7ce66017295a65ec028084b90800be377f8James Zern    "preceu.ph.qbr  %[temp5],   %[temp7]                       \n\t"
6897c8da7ce66017295a65ec028084b90800be377f8James Zern    "ins            %[temp2],   %[temp1], 16, 16               \n\t"
6907c8da7ce66017295a65ec028084b90800be377f8James Zern    "preceu.ph.qbl  %[temp4],   %[temp7]                       \n\t"
6917c8da7ce66017295a65ec028084b90800be377f8James Zern    "ins            %[temp3],   %[temp2], 16, 16               \n\t"
6927c8da7ce66017295a65ec028084b90800be377f8James Zern    "shll.ph        %[temp2],   %[temp2], 1                    \n\t"
6937c8da7ce66017295a65ec028084b90800be377f8James Zern    "addq.ph        %[temp3],   %[temp3], %[temp1]             \n\t"
6947c8da7ce66017295a65ec028084b90800be377f8James Zern    "packrl.ph      %[temp6],   %[temp5], %[temp1]             \n\t"
6957c8da7ce66017295a65ec028084b90800be377f8James Zern    "addq.ph        %[temp3],   %[temp3], %[temp2]             \n\t"
6967c8da7ce66017295a65ec028084b90800be377f8James Zern    "addq.ph        %[temp1],   %[temp1], %[temp5]             \n\t"
6977c8da7ce66017295a65ec028084b90800be377f8James Zern    "shll.ph        %[temp6],   %[temp6], 1                    \n\t"
6987c8da7ce66017295a65ec028084b90800be377f8James Zern    "addq.ph        %[temp1],   %[temp1], %[temp6]             \n\t"
6997c8da7ce66017295a65ec028084b90800be377f8James Zern    "packrl.ph      %[temp0],   %[temp4], %[temp5]             \n\t"
7007c8da7ce66017295a65ec028084b90800be377f8James Zern    "addq.ph        %[temp8],   %[temp5], %[temp4]             \n\t"
7017c8da7ce66017295a65ec028084b90800be377f8James Zern    "shra_r.ph      %[temp3],   %[temp3], 2                    \n\t"
7027c8da7ce66017295a65ec028084b90800be377f8James Zern    "shll.ph        %[temp0],   %[temp0], 1                    \n\t"
7037c8da7ce66017295a65ec028084b90800be377f8James Zern    "shra_r.ph      %[temp1],   %[temp1], 2                    \n\t"
7047c8da7ce66017295a65ec028084b90800be377f8James Zern    "addq.ph        %[temp8],   %[temp0], %[temp8]             \n\t"
7057c8da7ce66017295a65ec028084b90800be377f8James Zern    "lbu            %[temp5],   3-" XSTR(BPS) "(%[dst])        \n\t"
7067c8da7ce66017295a65ec028084b90800be377f8James Zern    "precrq.ph.w    %[temp7],   %[temp7], %[temp7]             \n\t"
7077c8da7ce66017295a65ec028084b90800be377f8James Zern    "shra_r.ph      %[temp8],   %[temp8], 2                    \n\t"
7087c8da7ce66017295a65ec028084b90800be377f8James Zern    "ins            %[temp7],   %[temp5], 0,  8                \n\t"
7097c8da7ce66017295a65ec028084b90800be377f8James Zern    "precr.qb.ph    %[temp2],   %[temp1], %[temp3]             \n\t"
7107c8da7ce66017295a65ec028084b90800be377f8James Zern    "raddu.w.qb     %[temp4],   %[temp7]                       \n\t"
7117c8da7ce66017295a65ec028084b90800be377f8James Zern    "precr.qb.ph    %[temp6],   %[temp8], %[temp1]             \n\t"
7127c8da7ce66017295a65ec028084b90800be377f8James Zern    "shra_r.w       %[temp4],   %[temp4], 2                    \n\t"
7137c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp2, temp6, 3, 0, 1, dst)
7147c8da7ce66017295a65ec028084b90800be377f8James Zern    "prepend        %[temp2],   %[temp8], 8                    \n\t"
7157c8da7ce66017295a65ec028084b90800be377f8James Zern    "prepend        %[temp6],   %[temp4], 8                    \n\t"
7167c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp2, temp6, 2, 0, 0, dst)
7177c8da7ce66017295a65ec028084b90800be377f8James Zern    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
7187c8da7ce66017295a65ec028084b90800be377f8James Zern      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
7197c8da7ce66017295a65ec028084b90800be377f8James Zern      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8)
7207c8da7ce66017295a65ec028084b90800be377f8James Zern    : [dst]"r"(dst)
7217c8da7ce66017295a65ec028084b90800be377f8James Zern    : "memory"
7227c8da7ce66017295a65ec028084b90800be377f8James Zern  );
7237c8da7ce66017295a65ec028084b90800be377f8James Zern}
7247c8da7ce66017295a65ec028084b90800be377f8James Zern
7257c8da7ce66017295a65ec028084b90800be377f8James Zern// TEMP0 = SRC[A * BPS]
7267c8da7ce66017295a65ec028084b90800be377f8James Zern// TEMP1 = SRC[B + C * BPS]
7277c8da7ce66017295a65ec028084b90800be377f8James Zern#define LOAD_8_BYTES(TEMP0, TEMP1, A, B, C, SRC)                               \
7287c8da7ce66017295a65ec028084b90800be377f8James Zern  "ulw    %[" #TEMP0 "],   " #A "*" XSTR(BPS) "(%[" #SRC "])         \n\t"     \
7297c8da7ce66017295a65ec028084b90800be377f8James Zern  "ulw    %[" #TEMP1 "],   " #B "+" #C "*" XSTR(BPS) "(%[" #SRC "])  \n\t"
7307c8da7ce66017295a65ec028084b90800be377f8James Zern
7317c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void LD4(uint8_t* dst) {   // Down-Left
7327c8da7ce66017295a65ec028084b90800be377f8James Zern  int temp0, temp1, temp2, temp3, temp4;
7337c8da7ce66017295a65ec028084b90800be377f8James Zern  int temp5, temp6, temp7, temp8, temp9;
7347c8da7ce66017295a65ec028084b90800be377f8James Zern  __asm__ volatile (
7357c8da7ce66017295a65ec028084b90800be377f8James Zern    LOAD_8_BYTES(temp0, temp1, -1, 4, -1, dst)
7367c8da7ce66017295a65ec028084b90800be377f8James Zern    "preceu.ph.qbl   %[temp2],    %[temp0]                     \n\t"
7377c8da7ce66017295a65ec028084b90800be377f8James Zern    "preceu.ph.qbr   %[temp3],    %[temp0]                     \n\t"
7387c8da7ce66017295a65ec028084b90800be377f8James Zern    "preceu.ph.qbr   %[temp4],    %[temp1]                     \n\t"
7397c8da7ce66017295a65ec028084b90800be377f8James Zern    "preceu.ph.qbl   %[temp5],    %[temp1]                     \n\t"
7407c8da7ce66017295a65ec028084b90800be377f8James Zern    "packrl.ph       %[temp6],    %[temp2],    %[temp3]        \n\t"
7417c8da7ce66017295a65ec028084b90800be377f8James Zern    "packrl.ph       %[temp7],    %[temp4],    %[temp2]        \n\t"
7427c8da7ce66017295a65ec028084b90800be377f8James Zern    "packrl.ph       %[temp8],    %[temp5],    %[temp4]        \n\t"
7437c8da7ce66017295a65ec028084b90800be377f8James Zern    "shll.ph         %[temp6],    %[temp6],    1               \n\t"
7447c8da7ce66017295a65ec028084b90800be377f8James Zern    "addq.ph         %[temp9],    %[temp2],    %[temp6]        \n\t"
7457c8da7ce66017295a65ec028084b90800be377f8James Zern    "shll.ph         %[temp7],    %[temp7],    1               \n\t"
7467c8da7ce66017295a65ec028084b90800be377f8James Zern    "addq.ph         %[temp9],    %[temp9],    %[temp3]        \n\t"
7477c8da7ce66017295a65ec028084b90800be377f8James Zern    "shll.ph         %[temp8],    %[temp8],    1               \n\t"
7487c8da7ce66017295a65ec028084b90800be377f8James Zern    "shra_r.ph       %[temp9],    %[temp9],    2               \n\t"
7497c8da7ce66017295a65ec028084b90800be377f8James Zern    "addq.ph         %[temp3],    %[temp4],    %[temp7]        \n\t"
7507c8da7ce66017295a65ec028084b90800be377f8James Zern    "addq.ph         %[temp0],    %[temp5],    %[temp8]        \n\t"
7517c8da7ce66017295a65ec028084b90800be377f8James Zern    "addq.ph         %[temp3],    %[temp3],    %[temp2]        \n\t"
7527c8da7ce66017295a65ec028084b90800be377f8James Zern    "addq.ph         %[temp0],    %[temp0],    %[temp4]        \n\t"
7537c8da7ce66017295a65ec028084b90800be377f8James Zern    "shra_r.ph       %[temp3],    %[temp3],    2               \n\t"
7547c8da7ce66017295a65ec028084b90800be377f8James Zern    "shra_r.ph       %[temp0],    %[temp0],    2               \n\t"
7557c8da7ce66017295a65ec028084b90800be377f8James Zern    "srl             %[temp1],    %[temp1],    24              \n\t"
7567c8da7ce66017295a65ec028084b90800be377f8James Zern    "sll             %[temp1],    %[temp1],    1               \n\t"
7577c8da7ce66017295a65ec028084b90800be377f8James Zern    "raddu.w.qb      %[temp5],    %[temp5]                     \n\t"
7587c8da7ce66017295a65ec028084b90800be377f8James Zern    "precr.qb.ph     %[temp9],    %[temp3],    %[temp9]        \n\t"
7597c8da7ce66017295a65ec028084b90800be377f8James Zern    "precr.qb.ph     %[temp3],    %[temp0],    %[temp3]        \n\t"
7607c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu            %[temp1],    %[temp1],    %[temp5]        \n\t"
7617c8da7ce66017295a65ec028084b90800be377f8James Zern    "shra_r.w        %[temp1],    %[temp1],    2               \n\t"
7627c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp9, temp3, 0, 0, 2, dst)
7637c8da7ce66017295a65ec028084b90800be377f8James Zern    "prepend         %[temp9],    %[temp0],    8               \n\t"
7647c8da7ce66017295a65ec028084b90800be377f8James Zern    "prepend         %[temp3],    %[temp1],    8               \n\t"
7657c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp9, temp3, 1, 0, 3, dst)
7667c8da7ce66017295a65ec028084b90800be377f8James Zern    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
7677c8da7ce66017295a65ec028084b90800be377f8James Zern      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
7687c8da7ce66017295a65ec028084b90800be377f8James Zern      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
7697c8da7ce66017295a65ec028084b90800be377f8James Zern      [temp9]"=&r"(temp9)
7707c8da7ce66017295a65ec028084b90800be377f8James Zern    : [dst]"r"(dst)
7717c8da7ce66017295a65ec028084b90800be377f8James Zern    : "memory"
7727c8da7ce66017295a65ec028084b90800be377f8James Zern  );
7737c8da7ce66017295a65ec028084b90800be377f8James Zern}
7747c8da7ce66017295a65ec028084b90800be377f8James Zern
7757c8da7ce66017295a65ec028084b90800be377f8James Zern//------------------------------------------------------------------------------
7767c8da7ce66017295a65ec028084b90800be377f8James Zern// Chroma
7777c8da7ce66017295a65ec028084b90800be377f8James Zern
7787c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void DC8uv(uint8_t* dst) {     // DC
7797c8da7ce66017295a65ec028084b90800be377f8James Zern  int temp0, temp1, temp2, temp3, temp4;
7807c8da7ce66017295a65ec028084b90800be377f8James Zern  int temp5, temp6, temp7, temp8, temp9;
7817c8da7ce66017295a65ec028084b90800be377f8James Zern  __asm__ volatile (
7827c8da7ce66017295a65ec028084b90800be377f8James Zern    LOAD_8_BYTES(temp0, temp1, -1, 4, -1, dst)
7837c8da7ce66017295a65ec028084b90800be377f8James Zern    LOAD_4_BYTES(temp2, temp3, temp4, temp5, -1, 0, -1, 1, -1, 2, -1, 3, dst)
7847c8da7ce66017295a65ec028084b90800be377f8James Zern    LOAD_4_BYTES(temp6, temp7, temp8, temp9, -1, 4, -1, 5, -1, 6, -1, 7, dst)
7857c8da7ce66017295a65ec028084b90800be377f8James Zern    "raddu.w.qb   %[temp0],   %[temp0]                   \n\t"
7867c8da7ce66017295a65ec028084b90800be377f8James Zern    "raddu.w.qb   %[temp1],   %[temp1]                   \n\t"
7877c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu         %[temp2],   %[temp2],    %[temp3]      \n\t"
7887c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu         %[temp4],   %[temp4],    %[temp5]      \n\t"
7897c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu         %[temp6],   %[temp6],    %[temp7]      \n\t"
7907c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu         %[temp8],   %[temp8],    %[temp9]      \n\t"
7917c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu         %[temp0],   %[temp0],    %[temp1]      \n\t"
7927c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu         %[temp2],   %[temp2],    %[temp4]      \n\t"
7937c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu         %[temp6],   %[temp6],    %[temp8]      \n\t"
7947c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu         %[temp0],   %[temp0],    %[temp2]      \n\t"
7957c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu         %[temp0],   %[temp0],    %[temp6]      \n\t"
7967c8da7ce66017295a65ec028084b90800be377f8James Zern    "shra_r.w     %[temp0],   %[temp0],    4             \n\t"
7977c8da7ce66017295a65ec028084b90800be377f8James Zern    "replv.qb     %[temp0],   %[temp0]                   \n\t"
7987c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp0, temp0, 0, 4, 0, dst)
7997c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp0, temp0, 1, 4, 1, dst)
8007c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp0, temp0, 2, 4, 2, dst)
8017c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp0, temp0, 3, 4, 3, dst)
8027c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp0, temp0, 4, 4, 4, dst)
8037c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp0, temp0, 5, 4, 5, dst)
8047c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp0, temp0, 6, 4, 6, dst)
8057c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp0, temp0, 7, 4, 7, dst)
8067c8da7ce66017295a65ec028084b90800be377f8James Zern    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
8077c8da7ce66017295a65ec028084b90800be377f8James Zern      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
8087c8da7ce66017295a65ec028084b90800be377f8James Zern      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
8097c8da7ce66017295a65ec028084b90800be377f8James Zern      [temp9]"=&r"(temp9)
8107c8da7ce66017295a65ec028084b90800be377f8James Zern    : [dst]"r"(dst)
8117c8da7ce66017295a65ec028084b90800be377f8James Zern    : "memory"
8127c8da7ce66017295a65ec028084b90800be377f8James Zern  );
8137c8da7ce66017295a65ec028084b90800be377f8James Zern}
8147c8da7ce66017295a65ec028084b90800be377f8James Zern
8157c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void DC8uvNoLeft(uint8_t* dst) {   // DC with no left samples
8167c8da7ce66017295a65ec028084b90800be377f8James Zern  int temp0, temp1;
8177c8da7ce66017295a65ec028084b90800be377f8James Zern  __asm__ volatile (
8187c8da7ce66017295a65ec028084b90800be377f8James Zern    LOAD_8_BYTES(temp0, temp1, -1, 4, -1, dst)
8197c8da7ce66017295a65ec028084b90800be377f8James Zern    "raddu.w.qb   %[temp0],   %[temp0]                   \n\t"
8207c8da7ce66017295a65ec028084b90800be377f8James Zern    "raddu.w.qb   %[temp1],   %[temp1]                   \n\t"
8217c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu         %[temp0],   %[temp0],    %[temp1]      \n\t"
8227c8da7ce66017295a65ec028084b90800be377f8James Zern    "shra_r.w     %[temp0],   %[temp0],    3             \n\t"
8237c8da7ce66017295a65ec028084b90800be377f8James Zern    "replv.qb     %[temp0],   %[temp0]                   \n\t"
8247c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp0, temp0, 0, 4, 0, dst)
8257c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp0, temp0, 1, 4, 1, dst)
8267c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp0, temp0, 2, 4, 2, dst)
8277c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp0, temp0, 3, 4, 3, dst)
8287c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp0, temp0, 4, 4, 4, dst)
8297c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp0, temp0, 5, 4, 5, dst)
8307c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp0, temp0, 6, 4, 6, dst)
8317c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp0, temp0, 7, 4, 7, dst)
8327c8da7ce66017295a65ec028084b90800be377f8James Zern    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
8337c8da7ce66017295a65ec028084b90800be377f8James Zern    : [dst]"r"(dst)
8347c8da7ce66017295a65ec028084b90800be377f8James Zern    : "memory"
8357c8da7ce66017295a65ec028084b90800be377f8James Zern  );
8367c8da7ce66017295a65ec028084b90800be377f8James Zern}
8377c8da7ce66017295a65ec028084b90800be377f8James Zern
8387c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void DC8uvNoTop(uint8_t* dst) {  // DC with no top samples
8397c8da7ce66017295a65ec028084b90800be377f8James Zern  int temp0, temp1, temp2, temp3, temp4;
8407c8da7ce66017295a65ec028084b90800be377f8James Zern  int temp5, temp6, temp7, temp8;
8417c8da7ce66017295a65ec028084b90800be377f8James Zern  __asm__ volatile (
8427c8da7ce66017295a65ec028084b90800be377f8James Zern    LOAD_4_BYTES(temp2, temp3, temp4, temp5, -1, 0, -1, 1, -1, 2, -1, 3, dst)
8437c8da7ce66017295a65ec028084b90800be377f8James Zern    LOAD_4_BYTES(temp6, temp7, temp8, temp1, -1, 4, -1, 5, -1, 6, -1, 7, dst)
8447c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu         %[temp2],   %[temp2],    %[temp3]      \n\t"
8457c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu         %[temp4],   %[temp4],    %[temp5]      \n\t"
8467c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu         %[temp6],   %[temp6],    %[temp7]      \n\t"
8477c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu         %[temp8],   %[temp8],    %[temp1]      \n\t"
8487c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu         %[temp2],   %[temp2],    %[temp4]      \n\t"
8497c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu         %[temp6],   %[temp6],    %[temp8]      \n\t"
8507c8da7ce66017295a65ec028084b90800be377f8James Zern    "addu         %[temp0],   %[temp6],    %[temp2]      \n\t"
8517c8da7ce66017295a65ec028084b90800be377f8James Zern    "shra_r.w     %[temp0],   %[temp0],    3             \n\t"
8527c8da7ce66017295a65ec028084b90800be377f8James Zern    "replv.qb     %[temp0],   %[temp0]                   \n\t"
8537c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp0, temp0, 0, 4, 0, dst)
8547c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp0, temp0, 1, 4, 1, dst)
8557c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp0, temp0, 2, 4, 2, dst)
8567c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp0, temp0, 3, 4, 3, dst)
8577c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp0, temp0, 4, 4, 4, dst)
8587c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp0, temp0, 5, 4, 5, dst)
8597c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp0, temp0, 6, 4, 6, dst)
8607c8da7ce66017295a65ec028084b90800be377f8James Zern    STORE_8_BYTES(temp0, temp0, 7, 4, 7, dst)
8617c8da7ce66017295a65ec028084b90800be377f8James Zern    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
8627c8da7ce66017295a65ec028084b90800be377f8James Zern      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
8637c8da7ce66017295a65ec028084b90800be377f8James Zern      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8)
8647c8da7ce66017295a65ec028084b90800be377f8James Zern    : [dst]"r"(dst)
8657c8da7ce66017295a65ec028084b90800be377f8James Zern    : "memory"
8667c8da7ce66017295a65ec028084b90800be377f8James Zern  );
8677c8da7ce66017295a65ec028084b90800be377f8James Zern}
8687c8da7ce66017295a65ec028084b90800be377f8James Zern
8697c8da7ce66017295a65ec028084b90800be377f8James Zern#undef LOAD_8_BYTES
8707c8da7ce66017295a65ec028084b90800be377f8James Zern#undef STORE_8_BYTES
8717c8da7ce66017295a65ec028084b90800be377f8James Zern#undef LOAD_4_BYTES
8727c8da7ce66017295a65ec028084b90800be377f8James Zern
8737c8da7ce66017295a65ec028084b90800be377f8James Zern#define CLIPPING(SIZE)                                                         \
8747c8da7ce66017295a65ec028084b90800be377f8James Zern  "preceu.ph.qbl   %[temp2],   %[temp0]                  \n\t"                 \
8757c8da7ce66017295a65ec028084b90800be377f8James Zern  "preceu.ph.qbr   %[temp0],   %[temp0]                  \n\t"                 \
8767c8da7ce66017295a65ec028084b90800be377f8James Zern".if " #SIZE " == 8                                      \n\t"                 \
8777c8da7ce66017295a65ec028084b90800be377f8James Zern  "preceu.ph.qbl   %[temp3],   %[temp1]                  \n\t"                 \
8787c8da7ce66017295a65ec028084b90800be377f8James Zern  "preceu.ph.qbr   %[temp1],   %[temp1]                  \n\t"                 \
8797c8da7ce66017295a65ec028084b90800be377f8James Zern".endif                                                  \n\t"                 \
8807c8da7ce66017295a65ec028084b90800be377f8James Zern  "addu.ph         %[temp2],   %[temp2],   %[dst_1]      \n\t"                 \
8817c8da7ce66017295a65ec028084b90800be377f8James Zern  "addu.ph         %[temp0],   %[temp0],   %[dst_1]      \n\t"                 \
8827c8da7ce66017295a65ec028084b90800be377f8James Zern".if " #SIZE " == 8                                      \n\t"                 \
8837c8da7ce66017295a65ec028084b90800be377f8James Zern  "addu.ph         %[temp3],   %[temp3],   %[dst_1]      \n\t"                 \
8847c8da7ce66017295a65ec028084b90800be377f8James Zern  "addu.ph         %[temp1],   %[temp1],   %[dst_1]      \n\t"                 \
8857c8da7ce66017295a65ec028084b90800be377f8James Zern".endif                                                  \n\t"                 \
8867c8da7ce66017295a65ec028084b90800be377f8James Zern  "shll_s.ph       %[temp2],   %[temp2],   7             \n\t"                 \
8877c8da7ce66017295a65ec028084b90800be377f8James Zern  "shll_s.ph       %[temp0],   %[temp0],   7             \n\t"                 \
8887c8da7ce66017295a65ec028084b90800be377f8James Zern".if " #SIZE " == 8                                      \n\t"                 \
8897c8da7ce66017295a65ec028084b90800be377f8James Zern  "shll_s.ph       %[temp3],   %[temp3],   7             \n\t"                 \
8907c8da7ce66017295a65ec028084b90800be377f8James Zern  "shll_s.ph       %[temp1],   %[temp1],   7             \n\t"                 \
8917c8da7ce66017295a65ec028084b90800be377f8James Zern".endif                                                  \n\t"                 \
8927c8da7ce66017295a65ec028084b90800be377f8James Zern  "precrqu_s.qb.ph %[temp0],   %[temp2],   %[temp0]      \n\t"                 \
8937c8da7ce66017295a65ec028084b90800be377f8James Zern".if " #SIZE " == 8                                      \n\t"                 \
8947c8da7ce66017295a65ec028084b90800be377f8James Zern  "precrqu_s.qb.ph %[temp1],   %[temp3],   %[temp1]      \n\t"                 \
8957c8da7ce66017295a65ec028084b90800be377f8James Zern".endif                                                  \n\t"
8967c8da7ce66017295a65ec028084b90800be377f8James Zern
8977c8da7ce66017295a65ec028084b90800be377f8James Zern
8987c8da7ce66017295a65ec028084b90800be377f8James Zern#define CLIP_8B_TO_DST(DST, TOP, SIZE) do {                                    \
8997c8da7ce66017295a65ec028084b90800be377f8James Zern  int dst_1 = ((int)(DST)[-1] << 16) + (DST)[-1];                              \
9007c8da7ce66017295a65ec028084b90800be377f8James Zern  int temp0, temp1, temp2, temp3;                                              \
9017c8da7ce66017295a65ec028084b90800be377f8James Zern  __asm__ volatile (                                                           \
9027c8da7ce66017295a65ec028084b90800be377f8James Zern  ".if " #SIZE " < 8                                     \n\t"                 \
9037c8da7ce66017295a65ec028084b90800be377f8James Zern    "ulw             %[temp0],   0(%[top])               \n\t"                 \
9047c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu.ph         %[dst_1],   %[dst_1],    %[top_1]   \n\t"                 \
9057c8da7ce66017295a65ec028084b90800be377f8James Zern    CLIPPING(4)                                                                \
9067c8da7ce66017295a65ec028084b90800be377f8James Zern    "usw             %[temp0],   0(%[dst])               \n\t"                 \
9077c8da7ce66017295a65ec028084b90800be377f8James Zern  ".else                                                 \n\t"                 \
9087c8da7ce66017295a65ec028084b90800be377f8James Zern    "ulw             %[temp0],   0(%[top])               \n\t"                 \
9097c8da7ce66017295a65ec028084b90800be377f8James Zern    "ulw             %[temp1],   4(%[top])               \n\t"                 \
9107c8da7ce66017295a65ec028084b90800be377f8James Zern    "subu.ph         %[dst_1],   %[dst_1],    %[top_1]   \n\t"                 \
9117c8da7ce66017295a65ec028084b90800be377f8James Zern    CLIPPING(8)                                                                \
9127c8da7ce66017295a65ec028084b90800be377f8James Zern    "usw             %[temp0],   0(%[dst])               \n\t"                 \
9137c8da7ce66017295a65ec028084b90800be377f8James Zern    "usw             %[temp1],   4(%[dst])               \n\t"                 \
9147c8da7ce66017295a65ec028084b90800be377f8James Zern  ".if " #SIZE " == 16                                   \n\t"                 \
9157c8da7ce66017295a65ec028084b90800be377f8James Zern    "ulw             %[temp0],   8(%[top])               \n\t"                 \
9167c8da7ce66017295a65ec028084b90800be377f8James Zern    "ulw             %[temp1],   12(%[top])              \n\t"                 \
9177c8da7ce66017295a65ec028084b90800be377f8James Zern    CLIPPING(8)                                                                \
9187c8da7ce66017295a65ec028084b90800be377f8James Zern    "usw             %[temp0],   8(%[dst])               \n\t"                 \
9197c8da7ce66017295a65ec028084b90800be377f8James Zern    "usw             %[temp1],   12(%[dst])              \n\t"                 \
9207c8da7ce66017295a65ec028084b90800be377f8James Zern  ".endif                                                \n\t"                 \
9217c8da7ce66017295a65ec028084b90800be377f8James Zern  ".endif                                                \n\t"                 \
9227c8da7ce66017295a65ec028084b90800be377f8James Zern    : [dst_1]"+&r"(dst_1), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),           \
9237c8da7ce66017295a65ec028084b90800be377f8James Zern      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3)                                 \
9247c8da7ce66017295a65ec028084b90800be377f8James Zern    : [top_1]"r"(top_1), [top]"r"((TOP)), [dst]"r"((DST))                      \
9257c8da7ce66017295a65ec028084b90800be377f8James Zern    : "memory"                                                                 \
9267c8da7ce66017295a65ec028084b90800be377f8James Zern  );                                                                           \
9277c8da7ce66017295a65ec028084b90800be377f8James Zern} while (0)
9287c8da7ce66017295a65ec028084b90800be377f8James Zern
9297c8da7ce66017295a65ec028084b90800be377f8James Zern#define CLIP_TO_DST(DST, SIZE) do {                                            \
9307c8da7ce66017295a65ec028084b90800be377f8James Zern  int y;                                                                       \
9317c8da7ce66017295a65ec028084b90800be377f8James Zern  const uint8_t* top = (DST) - BPS;                                            \
9327c8da7ce66017295a65ec028084b90800be377f8James Zern  const int top_1 = ((int)top[-1] << 16) + top[-1];                            \
9337c8da7ce66017295a65ec028084b90800be377f8James Zern  for (y = 0; y < (SIZE); ++y) {                                               \
9347c8da7ce66017295a65ec028084b90800be377f8James Zern    CLIP_8B_TO_DST((DST), top, (SIZE));                                        \
9357c8da7ce66017295a65ec028084b90800be377f8James Zern    (DST) += BPS;                                                              \
9367c8da7ce66017295a65ec028084b90800be377f8James Zern  }                                                                            \
9377c8da7ce66017295a65ec028084b90800be377f8James Zern} while (0)
9387c8da7ce66017295a65ec028084b90800be377f8James Zern
9397c8da7ce66017295a65ec028084b90800be377f8James Zern#define TRUE_MOTION(DST, SIZE)                                                 \
9407c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void TrueMotion##SIZE(uint8_t* (DST)) {                                 \
9417c8da7ce66017295a65ec028084b90800be377f8James Zern  CLIP_TO_DST((DST), (SIZE));                                                  \
9427c8da7ce66017295a65ec028084b90800be377f8James Zern}
9437c8da7ce66017295a65ec028084b90800be377f8James Zern
9447c8da7ce66017295a65ec028084b90800be377f8James ZernTRUE_MOTION(dst, 4)
9457c8da7ce66017295a65ec028084b90800be377f8James ZernTRUE_MOTION(dst, 8)
9467c8da7ce66017295a65ec028084b90800be377f8James ZernTRUE_MOTION(dst, 16)
9477c8da7ce66017295a65ec028084b90800be377f8James Zern
9487c8da7ce66017295a65ec028084b90800be377f8James Zern#undef TRUE_MOTION
9497c8da7ce66017295a65ec028084b90800be377f8James Zern#undef CLIP_TO_DST
9507c8da7ce66017295a65ec028084b90800be377f8James Zern#undef CLIP_8B_TO_DST
9517c8da7ce66017295a65ec028084b90800be377f8James Zern#undef CLIPPING
9527c8da7ce66017295a65ec028084b90800be377f8James Zern
9537c8da7ce66017295a65ec028084b90800be377f8James Zern//------------------------------------------------------------------------------
9547c8da7ce66017295a65ec028084b90800be377f8James Zern// Entry point
9557c8da7ce66017295a65ec028084b90800be377f8James Zern
9567c8da7ce66017295a65ec028084b90800be377f8James Zernextern void VP8DspInitMIPSdspR2(void);
9577c8da7ce66017295a65ec028084b90800be377f8James Zern
9587c8da7ce66017295a65ec028084b90800be377f8James ZernWEBP_TSAN_IGNORE_FUNCTION void VP8DspInitMIPSdspR2(void) {
9597c8da7ce66017295a65ec028084b90800be377f8James Zern  VP8TransformDC = TransformDC;
9607c8da7ce66017295a65ec028084b90800be377f8James Zern  VP8TransformAC3 = TransformAC3;
9617c8da7ce66017295a65ec028084b90800be377f8James Zern  VP8Transform = TransformTwo;
9627c8da7ce66017295a65ec028084b90800be377f8James Zern
9637c8da7ce66017295a65ec028084b90800be377f8James Zern  VP8VFilter16 = VFilter16;
9647c8da7ce66017295a65ec028084b90800be377f8James Zern  VP8HFilter16 = HFilter16;
9657c8da7ce66017295a65ec028084b90800be377f8James Zern  VP8VFilter8 = VFilter8;
9667c8da7ce66017295a65ec028084b90800be377f8James Zern  VP8HFilter8 = HFilter8;
9677c8da7ce66017295a65ec028084b90800be377f8James Zern  VP8VFilter16i = VFilter16i;
9687c8da7ce66017295a65ec028084b90800be377f8James Zern  VP8HFilter16i = HFilter16i;
9697c8da7ce66017295a65ec028084b90800be377f8James Zern  VP8VFilter8i = VFilter8i;
9707c8da7ce66017295a65ec028084b90800be377f8James Zern  VP8HFilter8i = HFilter8i;
9717c8da7ce66017295a65ec028084b90800be377f8James Zern  VP8SimpleVFilter16 = SimpleVFilter16;
9727c8da7ce66017295a65ec028084b90800be377f8James Zern  VP8SimpleHFilter16 = SimpleHFilter16;
9737c8da7ce66017295a65ec028084b90800be377f8James Zern  VP8SimpleVFilter16i = SimpleVFilter16i;
9747c8da7ce66017295a65ec028084b90800be377f8James Zern  VP8SimpleHFilter16i = SimpleHFilter16i;
9757c8da7ce66017295a65ec028084b90800be377f8James Zern
9767c8da7ce66017295a65ec028084b90800be377f8James Zern  VP8PredLuma4[0] = DC4;
9777c8da7ce66017295a65ec028084b90800be377f8James Zern  VP8PredLuma4[1] = TrueMotion4;
9787c8da7ce66017295a65ec028084b90800be377f8James Zern  VP8PredLuma4[2] = VE4;
9797c8da7ce66017295a65ec028084b90800be377f8James Zern  VP8PredLuma4[4] = RD4;
9807c8da7ce66017295a65ec028084b90800be377f8James Zern  VP8PredLuma4[6] = LD4;
9817c8da7ce66017295a65ec028084b90800be377f8James Zern
9827c8da7ce66017295a65ec028084b90800be377f8James Zern  VP8PredChroma8[0] = DC8uv;
9837c8da7ce66017295a65ec028084b90800be377f8James Zern  VP8PredChroma8[1] = TrueMotion8;
9847c8da7ce66017295a65ec028084b90800be377f8James Zern  VP8PredChroma8[4] = DC8uvNoTop;
9857c8da7ce66017295a65ec028084b90800be377f8James Zern  VP8PredChroma8[5] = DC8uvNoLeft;
9867c8da7ce66017295a65ec028084b90800be377f8James Zern
9877c8da7ce66017295a65ec028084b90800be377f8James Zern  VP8PredLuma16[1] = TrueMotion16;
9887c8da7ce66017295a65ec028084b90800be377f8James Zern}
9897c8da7ce66017295a65ec028084b90800be377f8James Zern
9907c8da7ce66017295a65ec028084b90800be377f8James Zern#else  // !WEBP_USE_MIPS_DSP_R2
9917c8da7ce66017295a65ec028084b90800be377f8James Zern
9927c8da7ce66017295a65ec028084b90800be377f8James ZernWEBP_DSP_INIT_STUB(VP8DspInitMIPSdspR2)
9937c8da7ce66017295a65ec028084b90800be377f8James Zern
9947c8da7ce66017295a65ec028084b90800be377f8James Zern#endif  // WEBP_USE_MIPS_DSP_R2
995