190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)// Copyright (c) 2013 The Chromium Authors. All rights reserved.
290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)// found in the LICENSE file.
490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)#include <algorithm>
690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)#include "skia/ext/convolver.h"
790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)#include "skia/ext/convolver_mips_dspr2.h"
890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)#include "third_party/skia/include/core/SkTypes.h"
990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
1090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)namespace skia {
1190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)// Convolves horizontally along a single row. The row data is given in
1290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)// |src_data| and continues for the num_values() of the filter.
1390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)void ConvolveHorizontally_mips_dspr2(const unsigned char* src_data,
1490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)                                     const ConvolutionFilter1D& filter,
1590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)                                     unsigned char* out_row,
1690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)                                     bool has_alpha) {
1790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)#if SIMD_MIPS_DSPR2
1890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  int row_to_filter = 0;
1990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  int num_values = filter.num_values();
2090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  if (has_alpha) {
2190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)    for (int out_x = 0; out_x < num_values; out_x++) {
2290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      // Get the filter that determines the current output pixel.
2390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      int filter_offset, filter_length;
2490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      const ConvolutionFilter1D::Fixed* filter_values =
2590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        filter.FilterForValue(out_x, &filter_offset, &filter_length);
2690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      int filter_x = 0;
2790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
2890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      __asm__ __volatile__ (
2990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        ".set push                                  \n"
3090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        ".set noreorder                             \n"
3190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
3290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "beqz            %[filter_len], 3f          \n"
3390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        " sll            $t0, %[filter_offset], 2   \n"
3490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            %[rtf], %[src_data], $t0   \n"
3590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "mtlo            $0, $ac0                   \n"
3690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "mtlo            $0, $ac1                   \n"
3790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "mtlo            $0, $ac2                   \n"
3890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "mtlo            $0, $ac3                   \n"
3990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "srl             $t7, %[filter_len], 2      \n"
4090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "beqz            $t7, 2f                    \n"
4190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        " li             %[fx], 0                   \n"
4290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
4390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "11:                                        \n"
4490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t4, %[filter_val], %[fx]  \n"
4590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "sll             $t5, %[fx], 1              \n"
4690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "ulw             $t6, 0($t4)                \n" // t6 = |cur[1]|cur[0]|
4790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "ulw             $t8, 4($t4)                \n" // t8 = |cur[3]|cur[2]|
4890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t0, %[rtf], $t5           \n"
4990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lw              $t1, 0($t0)                \n" // t1 = |a0|b0|g0|r0|
5090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lw              $t2, 4($t0)                \n" // t2 = |a1|b1|g1|r1|
5190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lw              $t3, 8($t0)                \n" // t3 = |a2|b2|g2|r2|
5290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lw              $t4, 12($t0)               \n" // t4 = |a3|b3|g3|r3|
5390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "precrq.qb.ph    $t0, $t2, $t1              \n" // t0 = |a1|g1|a0|g0|
5490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "precr.qb.ph     $t5, $t2, $t1              \n" // t5 = |b1|r1|b0|r0|
5590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "preceu.ph.qbla  $t1, $t0                   \n" // t1 = |0|a1|0|a0|
5690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "preceu.ph.qbra  $t2, $t0                   \n" // t2 = |0|g1|0|g0|
5790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "preceu.ph.qbla  $t0, $t5                   \n" // t0 = |0|b1|0|b0|
5890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "preceu.ph.qbra  $t5, $t5                   \n" // t5 = |0|r1|0|r0|
5990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "dpa.w.ph        $ac0, $t1, $t6             \n" // ac0+(cur*a1)+(cur*a0)
6090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "dpa.w.ph        $ac1, $t0, $t6             \n" // ac1+(cur*b1)+(cur*b0)
6190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "dpa.w.ph        $ac2, $t2, $t6             \n" // ac2+(cur*g1)+(cur*g0)
6290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "dpa.w.ph        $ac3, $t5, $t6             \n" // ac3+(cur*r1)+(cur*r0)
6390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "precrq.qb.ph    $t0, $t4, $t3              \n" // t0 = |a3|g3|a2|g2|
6490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "precr.qb.ph     $t5, $t4, $t3              \n" // t5 = |b3|r3|b2|r2|
6590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "preceu.ph.qbla  $t1, $t0                   \n" // t1 = |0|a3|0|a2|
6690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "preceu.ph.qbra  $t2, $t0                   \n" // t2 = |0|g3|0|g2|
6790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "preceu.ph.qbla  $t0, $t5                   \n" // t0 = |0|b3|0|b2|
6890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "preceu.ph.qbra  $t5, $t5                   \n" // t5 = |0|r3|0|r2|
6990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "dpa.w.ph        $ac0, $t1, $t8             \n" // ac0+(cur*a3)+(cur*a2)
7090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "dpa.w.ph        $ac1, $t0, $t8             \n" // ac1+(cur*b3)+(cur*b2)
7190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "dpa.w.ph        $ac2, $t2, $t8             \n" // ac2+(cur*g3)+(cur*g2)
7290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "dpa.w.ph        $ac3, $t5, $t8             \n" // ac3+(cur*r3)+(cur*r2)
7390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addiu           $t7, $t7, -1               \n"
7490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "bgtz            $t7, 11b                   \n"
7590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        " addiu          %[fx], %[fx], 8            \n"
7690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
7790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "2:                                         \n"
7890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "andi            $t7, %[filter_len], 0x3    \n" // residual
7990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "beqz            $t7, 3f                    \n"
8090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        " nop                                       \n"
8190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
8290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "21:                                        \n"
8390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "sll             $t1, %[fx], 1              \n"
8490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t2, %[filter_val], %[fx]  \n"
8590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t0, %[rtf], $t1           \n"
8690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lh              $t6, 0($t2)                \n" // t6 = filter_val[fx]
8790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lbu             $t1, 0($t0)                \n" // t1 = row[fx * 4 + 0]
8890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lbu             $t2, 1($t0)                \n" // t2 = row[fx * 4 + 1]
8990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lbu             $t3, 2($t0)                \n" // t3 = row[fx * 4 + 2]
9090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lbu             $t4, 3($t0)                \n" // t4 = row[fx * 4 + 2]
9190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "maddu           $ac3, $t6, $t1             \n"
9290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "maddu           $ac2, $t6, $t2             \n"
9390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "maddu           $ac1, $t6, $t3             \n"
9490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "maddu           $ac0, $t6, $t4             \n"
9590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addiu           $t7, $t7, -1               \n"
9690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "bgtz            $t7, 21b                   \n"
9790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        " addiu          %[fx], %[fx], 2            \n"
9890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
9990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "3:                                         \n"
10090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "extrv.w         $t0, $ac0, %[kShiftBits]   \n" // a >> kShiftBits
10190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "extrv.w         $t1, $ac1, %[kShiftBits]   \n" // b >> kShiftBits
10290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "extrv.w         $t2, $ac2, %[kShiftBits]   \n" // g >> kShiftBits
10390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "extrv.w         $t3, $ac3, %[kShiftBits]   \n" // r >> kShiftBits
10490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "sll             $t5, %[out_x], 2           \n"
10590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "repl.ph         $t6, 128                   \n" // t6 = | 128 | 128 |
10690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t5, %[out_row], $t5       \n"
10790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "append          $t2, $t3, 16               \n"
10890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "append          $t0, $t1, 16               \n"
10990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "subu.ph         $t1, $t0, $t6              \n"
11090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "shll_s.ph       $t1, $t1, 8                \n"
11190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "shra.ph         $t1, $t1, 8                \n"
11290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu.ph         $t1, $t1, $t6              \n"
11390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "subu.ph         $t3, $t2, $t6              \n"
11490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "shll_s.ph       $t3, $t3, 8                \n"
11590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "shra.ph         $t3, $t3, 8                \n"
11690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu.ph         $t3, $t3, $t6              \n"
11790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "precr.qb.ph     $t0, $t1, $t3              \n"
11890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "usw             $t0, 0($t5)                \n"
11990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
12090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        ".set pop                                   \n"
12190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      : [fx] "+r" (filter_x), [out_x] "+r" (out_x), [out_row] "+r" (out_row),
12290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        [rtf] "+r" (row_to_filter)
12390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      : [filter_val] "r" (filter_values), [filter_len] "r" (filter_length),
12490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        [kShiftBits] "r" (ConvolutionFilter1D::kShiftBits),
12590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        [filter_offset] "r" (filter_offset), [src_data] "r" (src_data)
12690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      : "lo", "hi", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo", "$ac3hi",
12790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8"
12890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      );
12990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)    }
13090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  } else {
13190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)    for (int out_x = 0; out_x < num_values; out_x++) {
13290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      // Get the filter that determines the current output pixel.
13390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      int filter_offset, filter_length;
13490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      const ConvolutionFilter1D::Fixed* filter_values =
13590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        filter.FilterForValue(out_x, &filter_offset, &filter_length);
13690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      int filter_x = 0;
13790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      __asm__ __volatile__ (
13890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        ".set push                                  \n"
13990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        ".set noreorder                             \n"
14090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
14190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "beqz            %[filter_len], 3f          \n"
14290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        " sll            $t0, %[filter_offset], 2   \n"
14390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            %[rtf], %[src_data], $t0   \n"
14490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "mtlo            $0, $ac1                   \n"
14590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "mtlo            $0, $ac2                   \n"
14690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "mtlo            $0, $ac3                   \n"
14790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "srl             $t7, %[filter_len], 2      \n"
14890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "beqz            $t7, 2f                    \n"
14990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        " li             %[fx], 0                   \n"
15090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
15190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "11:                                        \n"
15290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t4, %[filter_val], %[fx]  \n"
15390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "sll             $t5, %[fx], 1              \n"
15490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "ulw             $t6, 0($t4)                \n" // t6 = |cur[1]|cur[0]|
15590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "ulw             $t8, 4($t4)                \n" // t8 = |cur[3]|cur[2]|
15690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t0, %[rtf], $t5           \n"
15790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lw              $t1, 0($t0)                \n" // t1 = |a0|b0|g0|r0|
15890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lw              $t2, 4($t0)                \n" // t2 = |a1|b1|g1|r1|
15990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lw              $t3, 8($t0)                \n" // t3 = |a2|b2|g2|r2|
16090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lw              $t4, 12($t0)               \n" // t4 = |a3|b3|g3|r3|
16190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "precrq.qb.ph    $t0, $t2, $t1              \n" // t0 = |a1|g1|a0|g0|
16290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "precr.qb.ph     $t5, $t2, $t1              \n" // t5 = |b1|r1|b0|r0|
16390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "preceu.ph.qbra  $t2, $t0                   \n" // t2 = |0|g1|0|g0|
16490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "preceu.ph.qbla  $t0, $t5                   \n" // t0 = |0|b1|0|b0|
16590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "preceu.ph.qbra  $t5, $t5                   \n" // t5 = |0|r1|0|r0|
16690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "dpa.w.ph        $ac1, $t0, $t6             \n" // ac1+(cur*b1)+(cur*b0)
16790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "dpa.w.ph        $ac2, $t2, $t6             \n" // ac2+(cur*g1)+(cur*g0)
16890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "dpa.w.ph        $ac3, $t5, $t6             \n" // ac3+(cur*r1)+(cur*r0)
16990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "precrq.qb.ph    $t0, $t4, $t3              \n" // t0 = |a3|g3|a2|g2|
17090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "precr.qb.ph     $t5, $t4, $t3              \n" // t5 = |b3|r3|b2|r2|
17190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "preceu.ph.qbra  $t2, $t0                   \n" // t2 = |0|g3|0|g2|
17290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "preceu.ph.qbla  $t0, $t5                   \n" // t0 = |0|b3|0|b2|
17390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "preceu.ph.qbra  $t5, $t5                   \n" // t5 = |0|r3|0|r2|
17490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "dpa.w.ph        $ac1, $t0, $t8             \n" // ac1+(cur*b3)+(cur*b2)
17590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "dpa.w.ph        $ac2, $t2, $t8             \n" // ac2+(cur*g3)+(cur*g2)
17690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "dpa.w.ph        $ac3, $t5, $t8             \n" // ac3+(cur*r3)+(cur*r2)
17790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addiu           $t7, $t7, -1               \n"
17890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "bgtz            $t7, 11b                   \n"
17990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        " addiu          %[fx], %[fx], 8            \n"
18090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
18190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "2:                                         \n"
18290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "andi            $t7, %[filter_len], 0x3    \n" // residual
18390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "beqz            $t7, 3f                    \n"
18490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        " nop                                       \n"
18590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
18690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "21:                                        \n"
18790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "sll             $t1, %[fx], 1              \n"
18890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t2, %[filter_val], %[fx]  \n"
18990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t0, %[rtf], $t1           \n"
19090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lh              $t6, 0($t2)                \n" // t6 = filter_val[fx]
19190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lbu             $t1, 0($t0)                \n" // t1 = row[fx * 4 + 0]
19290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lbu             $t2, 1($t0)                \n" // t2 = row[fx * 4 + 1]
19390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lbu             $t3, 2($t0)                \n" // t3 = row[fx * 4 + 2]
19490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "maddu           $ac3, $t6, $t1             \n"
19590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "maddu           $ac2, $t6, $t2             \n"
19690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "maddu           $ac1, $t6, $t3             \n"
19790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addiu           $t7, $t7, -1               \n"
19890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "bgtz            $t7, 21b                   \n"
19990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        " addiu          %[fx], %[fx], 2            \n"
20090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
20190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "3:                                         \n"
20290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "extrv.w         $t1, $ac1, %[kShiftBits]   \n" // b >> kShiftBits
20390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "extrv.w         $t2, $ac2, %[kShiftBits]   \n" // g >> kShiftBits
20490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "extrv.w         $t3, $ac3, %[kShiftBits]   \n" // r >> kShiftBits
20590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "repl.ph         $t6, 128                   \n" // t6 = | 128 | 128 |
20690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "sll             $t8, %[out_x], 2           \n"
20790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t8, %[out_row], $t8       \n"
20890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "append          $t2, $t3, 16               \n"
20990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "andi            $t1, 0xFFFF                \n"
21090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "subu.ph         $t5, $t1, $t6              \n"
21190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "shll_s.ph       $t5, $t5, 8                \n"
21290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "shra.ph         $t5, $t5, 8                \n"
21390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu.ph         $t5, $t5, $t6              \n"
21490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "subu.ph         $t4, $t2, $t6              \n"
21590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "shll_s.ph       $t4, $t4, 8                \n"
21690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "shra.ph         $t4, $t4, 8                \n"
21790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu.ph         $t4, $t4, $t6              \n"
21890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "precr.qb.ph     $t0, $t5, $t4              \n"
21990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "usw             $t0, 0($t8)                \n"
22090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
22190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        ".set pop                                   \n"
22290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      : [fx] "+r" (filter_x), [out_x] "+r" (out_x), [out_row] "+r" (out_row),
22390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        [rtf] "+r" (row_to_filter)
22490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      : [filter_val] "r" (filter_values), [filter_len] "r" (filter_length),
22590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        [kShiftBits] "r" (ConvolutionFilter1D::kShiftBits),
22690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        [filter_offset] "r" (filter_offset), [src_data] "r" (src_data)
22790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      : "lo", "hi", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo", "$ac3hi",
22890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8"
22990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      );
23090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)    }
23190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  }
23290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)#endif
23390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)}
23490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)void ConvolveVertically_mips_dspr2(const ConvolutionFilter1D::Fixed* filter_val,
23590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)                                   int filter_length,
23690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)                                   unsigned char* const* source_data_rows,
23790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)                                   int pixel_width,
23890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)                                   unsigned char* out_row,
23990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)                                   bool has_alpha) {
24090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)#if SIMD_MIPS_DSPR2
24190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  // We go through each column in the output and do a vertical convolution,
24290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  // generating one output pixel each time.
24390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  int byte_offset;
24490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  int cnt;
24590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  int filter_y;
24690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  if (has_alpha) {
24790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)    for (int out_x = 0; out_x < pixel_width; out_x++) {
24890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      __asm__ __volatile__ (
24990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        ".set push                                   \n"
25090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        ".set noreorder                              \n"
25190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
25290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "beqz            %[filter_len], 3f           \n"
25390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        " sll            %[offset], %[out_x], 2      \n"
25490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "mtlo            $0, $ac0                    \n"
25590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "mtlo            $0, $ac1                    \n"
25690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "mtlo            $0, $ac2                    \n"
25790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "mtlo            $0, $ac3                    \n"
25890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "srl             %[cnt], %[filter_len], 2    \n"
25990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "beqz            %[cnt], 2f                  \n"
26090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        " li             %[fy], 0                    \n"
26190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
26290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "11:                                         \n"
26390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "sll             $t1, %[fy], 1               \n"
26490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t0, %[src_data_rows], $t1  \n"
26590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lw              $t1, 0($t0)                 \n"
26690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lw              $t2, 4($t0)                 \n"
26790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lw              $t3, 8($t0)                 \n"
26890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lw              $t4, 12($t0)                \n"
26990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t1, $t1, %[offset]         \n"
27090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t2, $t2, %[offset]         \n"
27190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t3, $t3, %[offset]         \n"
27290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t4, $t4, %[offset]         \n"
27390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lw              $t1, 0($t1)                 \n" // t1 = |a0|b0|g0|r0|
27490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lw              $t2, 0($t2)                 \n" // t2 = |a1|b1|g1|r1|
27590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lw              $t3, 0($t3)                 \n" // t3 = |a0|b0|g0|r0|
27690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lw              $t4, 0($t4)                 \n" // t4 = |a1|b1|g1|r1|
27790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "precrq.qb.ph    $t5, $t2, $t1               \n" // t5 = |a1|g1|a0|g0|
27890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "precr.qb.ph     $t6, $t2, $t1               \n" // t6 = |b1|r1|b0|r0|
27990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "preceu.ph.qbla  $t0, $t5                    \n" // t0 = |0|a1|0|a0|
28090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "preceu.ph.qbra  $t1, $t5                    \n" // t1 = |0|g1|0|g0|
28190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "preceu.ph.qbla  $t2, $t6                    \n" // t2 = |0|b1|0|b0|
28290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "preceu.ph.qbra  $t5, $t6                    \n" // t5 = |0|r1|0|r0|
28390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t6, %[filter_val], %[fy]   \n"
28490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "ulw             $t7, 0($t6)                 \n" // t7 = |cur_1|cur_0|
28590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "ulw             $t6, 4($t6)                 \n" // t6 = |cur_3|cur_2|
28690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "dpa.w.ph        $ac0, $t5, $t7              \n" // (cur*r1)+(cur*r0)
28790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "dpa.w.ph        $ac1, $t1, $t7              \n" // (cur*g1)+(cur*g0)
28890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "dpa.w.ph        $ac2, $t2, $t7              \n" // (cur*b1)+(cur*b0)
28990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "dpa.w.ph        $ac3, $t0, $t7              \n" // (cur*a1)+(cur*a0)
29090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "precrq.qb.ph    $t5, $t4, $t3               \n" // t5 = |a3|g3|a2|g2|
29190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "precr.qb.ph     $t7, $t4, $t3               \n" // t7 = |b3|r3|b2|r2|
29290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "preceu.ph.qbla  $t0, $t5                    \n" // t0 = |0|a3|0|a2|
29390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "preceu.ph.qbra  $t1, $t5                    \n" // t1 = |0|g3|0|g2|
29490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "preceu.ph.qbla  $t2, $t7                    \n" // t2 = |0|b3|0|b2|
29590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "preceu.ph.qbra  $t5, $t7                    \n" // t5 = |0|r3|0|r2|
29690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "dpa.w.ph        $ac0, $t5, $t6              \n" // (cur*r3)+(cur*r2)
29790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "dpa.w.ph        $ac1, $t1, $t6              \n" // (cur*g3)+(cur*g2)
29890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "dpa.w.ph        $ac2, $t2, $t6              \n" // (cur*b3)+(cur*b2)
29990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "dpa.w.ph        $ac3, $t0, $t6              \n" // (cur*a3)+(cur*a2)
30090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addiu           %[cnt], %[cnt], -1          \n"
30190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "bgtz            %[cnt], 11b                 \n"
30290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        " addiu          %[fy], %[fy], 8             \n"
30390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
30490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "2:                                          \n"
30590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "andi            %[cnt], %[filter_len], 0x3  \n" // residual
30690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "beqz            %[cnt], 3f                  \n"
30790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        " nop                                        \n"
30890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
30990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "21:                                         \n"
31090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t0, %[filter_val], %[fy]   \n"
31190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lh              $t4, 0($t0)                 \n" // t4=filter_val[fx]
31290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "sll             $t1, %[fy], 1               \n"
31390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t0, %[src_data_rows], $t1  \n"
31490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lw              $t1, 0($t0)                 \n"
31590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t0, $t1, %[offset]         \n"
31690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lbu             $t1, 0($t0)                 \n" // t1 = row[fx*4 + 0]
31790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lbu             $t2, 1($t0)                 \n" // t2 = row[fx*4 + 1]
31890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lbu             $t3, 2($t0)                 \n" // t3 = row[fx*4 + 2]
31990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lbu             $t0, 3($t0)                 \n" // t4 = row[fx*4 + 2]
32090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "maddu           $ac0, $t4, $t1              \n"
32190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "maddu           $ac1, $t4, $t2              \n"
32290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "maddu           $ac2, $t4, $t3              \n"
32390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "maddu           $ac3, $t4, $t0              \n"
32490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addiu           %[cnt], %[cnt], -1          \n"
32590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "bgtz            %[cnt], 21b                 \n"
32690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        " addiu          %[fy], %[fy], 2             \n"
32790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
32890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "3:                                          \n"
32990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "extrv.w         $t3, $ac0, %[kShiftBits]    \n" // a >> kShiftBits
33090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "extrv.w         $t2, $ac1, %[kShiftBits]    \n" // b >> kShiftBits
33190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "extrv.w         $t1, $ac2, %[kShiftBits]    \n" // g >> kShiftBits
33290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "extrv.w         $t0, $ac3, %[kShiftBits]    \n" // r >> kShiftBits
33390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "repl.ph         $t4, 128                    \n" // t4 = | 128 | 128 |
33490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t5, %[out_row], %[offset]  \n"
33590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "append          $t2, $t3, 16                \n" // t2 = |0|g|0|r|
33690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "append          $t0, $t1, 16                \n" // t0 = |0|a|0|b|
33790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "subu.ph         $t1, $t0, $t4               \n"
33890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "shll_s.ph       $t1, $t1, 8                 \n"
33990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "shra.ph         $t1, $t1, 8                 \n"
34090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu.ph         $t1, $t1, $t4               \n" // Clamp(a)|Clamp(b)
34190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "subu.ph         $t2, $t2, $t4               \n"
34290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "shll_s.ph       $t2, $t2, 8                 \n"
34390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "shra.ph         $t2, $t2, 8                 \n"
34490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu.ph         $t2, $t2, $t4               \n" // Clamp(g)|Clamp(r)
34590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "andi            $t3, $t1, 0xFF              \n" // t3 = ClampTo8(b)
34690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "cmp.lt.ph       $t3, $t2                    \n" // cmp b, g, r
34790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "pick.ph         $t0, $t2, $t3               \n"
34890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "andi            $t3, $t0, 0xFF              \n"
34990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "srl             $t4, $t0, 16                \n"
35090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "cmp.lt.ph       $t3, $t4                    \n"
35190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "pick.ph         $t0, $t4, $t3               \n" // t0 = max_color_ch
35290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "srl             $t3, $t1, 16                \n" // t1 = ClampTo8(a)
35390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "cmp.lt.ph       $t3, $t0                    \n"
35490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "pick.ph         $t0, $t0, $t3               \n"
35590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "ins             $t1, $t0, 16, 8             \n"
35690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "precr.qb.ph     $t0, $t1, $t2               \n" // t0 = |a|b|g|r|
35790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "usw             $t0, 0($t5)                 \n"
35890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
35990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        ".set pop                                    \n"
36090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      : [filter_val] "+r" (filter_val), [filter_len] "+r" (filter_length),
36190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        [offset] "+r" (byte_offset), [fy] "+r" (filter_y), [cnt] "+r" (cnt),
36290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        [out_x] "+r" (out_x), [pixel_width] "+r" (pixel_width)
36390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      : [src_data_rows] "r" (source_data_rows), [out_row] "r" (out_row),
36490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        [kShiftBits] "r" (ConvolutionFilter1D::kShiftBits)
36590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      : "lo", "hi", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo", "$ac3hi",
36690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "t0", "t1", "t2", "t3", "t4", "t5", "t6","t7", "memory"
36790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      );
36890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)    }
36990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  } else {
37090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)    for (int out_x = 0; out_x < pixel_width; out_x++) {
37190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      __asm__ __volatile__ (
37290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        ".set push                                   \n"
37390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        ".set noreorder                              \n"
37490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
37590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "beqz            %[filter_len], 3f           \n"
37690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        " sll            %[offset], %[out_x], 2      \n"
37790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "mtlo            $0, $ac0                    \n"
37890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "mtlo            $0, $ac1                    \n"
37990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "mtlo            $0, $ac2                    \n"
38090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "srl             %[cnt], %[filter_len], 2    \n"
38190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "beqz            %[cnt], 2f                  \n"
38290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        " li             %[fy], 0                    \n"
38390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
38490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "11:                                         \n"
38590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "sll             $t1, %[fy], 1               \n"
38690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t0, %[src_data_rows], $t1  \n"
38790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lw              $t1, 0($t0)                 \n"
38890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lw              $t2, 4($t0)                 \n"
38990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lw              $t3, 8($t0)                 \n"
39090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lw              $t4, 12($t0)                \n"
39190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t1, $t1, %[offset]         \n"
39290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t2, $t2, %[offset]         \n"
39390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t3, $t3, %[offset]         \n"
39490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t4, $t4, %[offset]         \n"
39590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lw              $t1, 0($t1)                 \n" // t1 = |a0|b0|g0|r0|
39690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lw              $t2, 0($t2)                 \n" // t2 = |a1|b1|g1|r1|
39790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lw              $t3, 0($t3)                 \n" // t3 = |a0|b0|g0|r0|
39890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lw              $t4, 0($t4)                 \n" // t4 = |a1|b1|g1|r1|
39990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "precrq.qb.ph    $t5, $t2, $t1               \n" // t5 = |a1|g1|a0|g0|
40090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "precr.qb.ph     $t6, $t2, $t1               \n" // t6 = |b1|r1|b0|r0|
40190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "preceu.ph.qbra  $t1, $t5                    \n" // t1 = |0|g1|0|g0|
40290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "preceu.ph.qbla  $t2, $t6                    \n" // t2 = |0|b1|0|b0|
40390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "preceu.ph.qbra  $t5, $t6                    \n" // t5 = |0|r1|0|r0|
40490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t6, %[filter_val], %[fy]   \n"
40590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "ulw             $t0, 0($t6)                 \n" // t0 = |cur_1|cur_0|
40690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "ulw             $t6, 4($t6)                 \n" // t6 = |cur_1|cur_0|
40790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "dpa.w.ph        $ac0, $t5, $t0              \n" // (cur*r1)+(cur*r0)
40890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "dpa.w.ph        $ac1, $t1, $t0              \n" // (cur*g1)+(cur*g0)
40990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "dpa.w.ph        $ac2, $t2, $t0              \n" // (cur*b1)+(cur*b0)
41090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "precrq.qb.ph    $t5, $t4, $t3               \n" // t5 = |a3|g3|a2|g2|
41190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "precr.qb.ph     $t0, $t4, $t3               \n" // t0 = |b3|r3|b2|r2|
41290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "preceu.ph.qbra  $t1, $t5                    \n" // t1 = |0|g3|0|g2|
41390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "preceu.ph.qbla  $t2, $t0                    \n" // t2 = |0|b3|0|b2|
41490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "preceu.ph.qbra  $t5, $t0                    \n" // t5 = |0|r3|0|r2|
41590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "dpa.w.ph        $ac0, $t5, $t6              \n" // (cur*r1)+(cur*r0)
41690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "dpa.w.ph        $ac1, $t1, $t6              \n" // (cur*g1)+(cur*g0)
41790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "dpa.w.ph        $ac2, $t2, $t6              \n" // (cur*b1)+(cur*b0)
41890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addiu           %[cnt], %[cnt], -1          \n"
41990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "bgtz            %[cnt], 11b                 \n"
42090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        " addiu          %[fy], %[fy], 8             \n"
42190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
42290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "2:                                          \n"
42390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "andi            %[cnt], %[filter_len], 0x3  \n" // residual
42490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "beqz            %[cnt], 3f                  \n"
42590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        " nop                                        \n"
42690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
42790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "21:                                         \n"
42890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t0, %[filter_val], %[fy]   \n"
42990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lh              $t4, 0($t0)                 \n" // filter_val[fx]
43090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "sll             $t1, %[fy], 1               \n"
43190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t0, %[src_data_rows], $t1  \n"
43290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lw              $t1, 0($t0)                 \n"
43390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t0, $t1, %[offset]         \n"
43490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lbu             $t1, 0($t0)                 \n" // t1 = row[fx*4 + 0]
43590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lbu             $t2, 1($t0)                 \n" // t2 = row[fx*4 + 1]
43690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "lbu             $t3, 2($t0)                 \n" // t3 = row[fx*4 + 2]
43790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "maddu           $ac0, $t4, $t1              \n"
43890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "maddu           $ac1, $t4, $t2              \n"
43990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "maddu           $ac2, $t4, $t3              \n"
44090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addiu           %[cnt], %[cnt], -1          \n"
44190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "bgtz            %[cnt], 21b                 \n"
44290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        " addiu          %[fy], %[fy], 2             \n"
44390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
44490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "3:                                          \n"
44590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "extrv.w         $t3, $ac0, %[kShiftBits]    \n" // r >> kShiftBits
44690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "extrv.w         $t2, $ac1, %[kShiftBits]    \n" // g >> kShiftBits
44790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "extrv.w         $t1, $ac2, %[kShiftBits]    \n" // b >> kShiftBits
44890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "repl.ph         $t6, 128                    \n" // t6 = | 128 | 128 |
44990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu            $t5, %[out_row], %[offset]  \n"
45090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "append          $t2, $t3, 16                \n" // t2 = |0|g|0|r|
45190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "andi            $t1, $t1, 0xFFFF            \n"
45290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "subu.ph         $t1, $t1, $t6               \n"
45390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "shll_s.ph       $t1, $t1, 8                 \n"
45490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "shra.ph         $t1, $t1, 8                 \n"
45590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu.ph         $t1, $t1, $t6               \n" // Clamp(a)|Clamp(b)
45690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "subu.ph         $t2, $t2, $t6               \n"
45790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "shll_s.ph       $t2, $t2, 8                 \n"
45890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "shra.ph         $t2, $t2, 8                 \n"
45990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "addu.ph         $t2, $t2, $t6               \n" // Clamp(g)|Clamp(r)
46090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "li              $t0, 0xFF                   \n"
46190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "ins             $t1, $t0, 16, 8             \n"
46290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "precr.qb.ph     $t0, $t1, $t2               \n" // t0 = |a|b|g|r|
46390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "usw             $t0, 0($t5)                 \n"
46490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
46590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        ".set pop                                    \n"
46690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      : [filter_val] "+r" (filter_val), [filter_len] "+r" (filter_length),
46790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        [offset] "+r" (byte_offset), [fy] "+r" (filter_y), [cnt] "+r" (cnt),
46890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        [out_x] "+r" (out_x), [pixel_width] "+r" (pixel_width)
46990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      : [src_data_rows] "r" (source_data_rows), [out_row] "r" (out_row),
47090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        [kShiftBits] "r" (ConvolutionFilter1D::kShiftBits)
47190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      : "lo", "hi", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo", "$ac3hi",
47290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)        "t0", "t1", "t2", "t3", "t4", "t5", "t6", "memory"
47390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      );
47490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)    }
47590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  }
47690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)#endif
47790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)}
47890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)} // namespace skia
479