1da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian/*
2da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *
4da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  Use of this source code is governed by a BSD-style license
5da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  that can be found in the LICENSE file in the root of the source
6da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  tree. An additional intellectual property rights grant can be found
7da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  in the file PATENTS.  All contributing project authors may
8da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  be found in the AUTHORS file in the root of the source tree.
9da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian */
10da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
11da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "./vpx_config.h"
12da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "./vpx_dsp_rtcd.h"
13da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "vpx_dsp/mips/inv_txfm_dspr2.h"
14da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "vpx_dsp/txfm_common.h"
15da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
16da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#if HAVE_DSPR2
17da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid idct16_rows_dspr2(const int16_t *input, int16_t *output,
18da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian                       uint32_t no_rows) {
19da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int i;
20da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7;
21da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int step1_10, step1_11, step1_12, step1_13;
22da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int step2_0, step2_1, step2_2, step2_3;
23da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int step2_8, step2_9, step2_10, step2_11;
24da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int step2_12, step2_13, step2_14, step2_15;
25da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int load1, load2, load3, load4, load5, load6, load7, load8;
26da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int result1, result2, result3, result4;
27da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  const int const_2_power_13 = 8192;
28da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
29da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  for (i = no_rows; i--; ) {
30da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    /* prefetch row */
31da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    prefetch_load((const uint8_t *)(input + 16));
32da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
33da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    __asm__ __volatile__ (
34da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load1],              0(%[input])                    \n\t"
35da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load2],             16(%[input])                    \n\t"
36da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load3],              8(%[input])                    \n\t"
37da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load4],             24(%[input])                    \n\t"
38da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
39da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac1                            \n\t"
40da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac1                            \n\t"
41da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac2                            \n\t"
42da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac2                            \n\t"
43da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[result1],           %[load1],       %[load2]        \n\t"
44da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[result2],           %[load1],       %[load2]        \n\t"
45da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[result1],     %[cospi_16_64]  \n\t"
46da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac2,                 %[result2],     %[cospi_16_64]  \n\t"
47da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[step2_0],           $ac1,           31              \n\t"
48da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[step2_1],           $ac2,           31              \n\t"
49da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
50da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac3                            \n\t"
51da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac3                            \n\t"
52da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac3,                 %[load3],       %[cospi_24_64]  \n\t"
53da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "msub     $ac3,                 %[load4],       %[cospi_8_64]   \n\t"
54da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[step2_2],           $ac3,           31              \n\t"
55da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
56da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac1                            \n\t"
57da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac1                            \n\t"
58da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[load3],       %[cospi_8_64]   \n\t"
59da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[load4],       %[cospi_24_64]  \n\t"
60da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[step2_3],           $ac1,           31              \n\t"
61da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
62da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[step1_0],           %[step2_0],     %[step2_3]      \n\t"
63da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[step1_1],           %[step2_1],     %[step2_2]      \n\t"
64da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[step1_2],           %[step2_1],     %[step2_2]      \n\t"
65da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[step1_3],           %[step2_0],     %[step2_3]      \n\t"
66da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
67da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [load1] "=&r" (load1), [load2] "=&r" (load2),
68da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [load3] "=&r" (load3), [load4] "=&r" (load4),
69da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [result1] "=&r" (result1), [result2] "=&r" (result2),
70da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step2_0] "=&r" (step2_0), [step2_1] "=&r" (step2_1),
71da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step2_2] "=&r" (step2_2), [step2_3] "=&r" (step2_3),
72da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step1_0] "=r" (step1_0), [step1_1] "=r" (step1_1),
73da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step1_2] "=r" (step1_2), [step1_3] "=r" (step1_3)
74da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [const_2_power_13] "r" (const_2_power_13), [input] "r" (input),
75da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [cospi_24_64] "r" (cospi_24_64), [cospi_8_64] "r" (cospi_8_64),
76da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [cospi_16_64] "r" (cospi_16_64)
77da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    );
78da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
79da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    __asm__ __volatile__ (
80da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load5],             2(%[input])                     \n\t"
81da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load6],             30(%[input])                    \n\t"
82da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load7],             18(%[input])                    \n\t"
83da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load8],             14(%[input])                    \n\t"
84da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
85da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac1                            \n\t"
86da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac1                            \n\t"
87da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac3                            \n\t"
88da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac3                            \n\t"
89da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
90da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[load5],       %[cospi_30_64]  \n\t"
91da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "msub     $ac1,                 %[load6],       %[cospi_2_64]   \n\t"
92da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[result1],           $ac1,           31              \n\t"
93da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
94da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac3,                 %[load7],       %[cospi_14_64]  \n\t"
95da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "msub     $ac3,                 %[load8],       %[cospi_18_64]  \n\t"
96da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[result2],           $ac3,           31              \n\t"
97da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
98da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac1                            \n\t"
99da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac1                            \n\t"
100da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac2                            \n\t"
101da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac2                            \n\t"
102da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
103da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[load7],       %[cospi_18_64]  \n\t"
104da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[load8],       %[cospi_14_64]  \n\t"
105da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[result3],           $ac1,           31              \n\t"
106da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
107da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac2,                 %[load5],       %[cospi_2_64]   \n\t"
108da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac2,                 %[load6],       %[cospi_30_64]  \n\t"
109da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[result4],           $ac2,           31              \n\t"
110da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
111da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],             %[result1],     %[result2]      \n\t"
112da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],             %[result4],     %[result3]      \n\t"
113da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
114da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac1                            \n\t"
115da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac1                            \n\t"
116da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac3                            \n\t"
117da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac3                            \n\t"
118da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
119da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[load6],       %[cospi_24_64]  \n\t"
120da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "msub     $ac1,                 %[load5],       %[cospi_8_64]   \n\t"
121da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac3,                 %[load5],       %[cospi_24_64]  \n\t"
122da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac3,                 %[load6],       %[cospi_8_64]   \n\t"
123da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
124da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[step2_9],           $ac1,           31              \n\t"
125da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[step2_14],          $ac3,           31              \n\t"
126da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[step2_8],           %[result1],     %[result2]      \n\t"
127da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[step2_15],          %[result4],     %[result3]      \n\t"
128da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
129da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [load5] "=&r" (load5), [load6] "=&r" (load6),
130da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [load7] "=&r" (load7), [load8] "=&r" (load8),
131da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [result1] "=&r" (result1), [result2] "=&r" (result2),
132da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [result3] "=&r" (result3), [result4] "=&r" (result4),
133da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step2_8] "=r" (step2_8), [step2_15] "=r" (step2_15),
134da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step2_9] "=r" (step2_9), [step2_14] "=r" (step2_14)
135da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [const_2_power_13] "r" (const_2_power_13), [input] "r" (input),
136da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [cospi_30_64] "r" (cospi_30_64), [cospi_2_64] "r" (cospi_2_64),
137da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [cospi_14_64] "r" (cospi_14_64), [cospi_18_64] "r" (cospi_18_64),
138da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [cospi_24_64] "r" (cospi_24_64), [cospi_8_64] "r" (cospi_8_64)
139da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    );
140da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
141da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    __asm__ __volatile__ (
142da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load1],             10(%[input])                    \n\t"
143da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load2],             22(%[input])                    \n\t"
144da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load3],             26(%[input])                    \n\t"
145da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load4],             6(%[input])                     \n\t"
146da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
147da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac1                            \n\t"
148da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac1                            \n\t"
149da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac3                            \n\t"
150da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac3                            \n\t"
151da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
152da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[load1],       %[cospi_22_64]  \n\t"
153da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "msub     $ac1,                 %[load2],       %[cospi_10_64]  \n\t"
154da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[result1],           $ac1,           31              \n\t"
155da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
156da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac3,                 %[load3],       %[cospi_6_64]   \n\t"
157da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "msub     $ac3,                 %[load4],       %[cospi_26_64]  \n\t"
158da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[result2],           $ac3,           31              \n\t"
159da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
160da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac1                            \n\t"
161da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac1                            \n\t"
162da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac2                            \n\t"
163da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac2                            \n\t"
164da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
165da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[load1],       %[cospi_10_64]  \n\t"
166da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[load2],       %[cospi_22_64]  \n\t"
167da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[result3],           $ac1,           31              \n\t"
168da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
169da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac2,                 %[load3],       %[cospi_26_64]  \n\t"
170da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac2,                 %[load4],       %[cospi_6_64]   \n\t"
171da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[result4],           $ac2,           31              \n\t"
172da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
173da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac1                            \n\t"
174da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac1                            \n\t"
175da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac3                            \n\t"
176da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac3                            \n\t"
177da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
178da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load1],             %[result2],     %[result1]      \n\t"
179da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load2],             %[result4],     %[result3]      \n\t"
180da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
181da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "msub     $ac1,                 %[load1],       %[cospi_24_64]  \n\t"
182da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "msub     $ac1,                 %[load2],       %[cospi_8_64]   \n\t"
183da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac3,                 %[load2],       %[cospi_24_64]  \n\t"
184da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "msub     $ac3,                 %[load1],       %[cospi_8_64]   \n\t"
185da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
186da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[step2_10],          $ac1,           31              \n\t"
187da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[step2_13],          $ac3,           31              \n\t"
188da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[step2_11],          %[result1],     %[result2]      \n\t"
189da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[step2_12],          %[result4],     %[result3]      \n\t"
190da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
191da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [load1] "=&r" (load1), [load2] "=&r" (load2),
192da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [load3] "=&r" (load3), [load4] "=&r" (load4),
193da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [result1] "=&r" (result1), [result2] "=&r" (result2),
194da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [result3] "=&r" (result3), [result4] "=&r" (result4),
195da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step2_10] "=r" (step2_10), [step2_11] "=r" (step2_11),
196da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step2_12] "=r" (step2_12), [step2_13] "=r" (step2_13)
197da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [const_2_power_13] "r" (const_2_power_13), [input] "r" (input),
198da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [cospi_22_64] "r" (cospi_22_64), [cospi_10_64] "r" (cospi_10_64),
199da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [cospi_6_64] "r" (cospi_6_64), [cospi_26_64] "r" (cospi_26_64),
200da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [cospi_24_64] "r" (cospi_24_64), [cospi_8_64] "r" (cospi_8_64)
201da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    );
202da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
203da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    __asm__ __volatile__ (
204da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load5],             4(%[input])                     \n\t"
205da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load6],             28(%[input])                    \n\t"
206da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load7],             20(%[input])                    \n\t"
207da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load8],             12(%[input])                    \n\t"
208da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
209da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac1                            \n\t"
210da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac1                            \n\t"
211da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac3                            \n\t"
212da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac3                            \n\t"
213da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
214da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[load5],       %[cospi_28_64]  \n\t"
215da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "msub     $ac1,                 %[load6],       %[cospi_4_64]   \n\t"
216da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[result1],           $ac1,           31              \n\t"
217da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
218da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac3,                 %[load7],       %[cospi_12_64]  \n\t"
219da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "msub     $ac3,                 %[load8],       %[cospi_20_64]  \n\t"
220da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[result2],           $ac3,           31              \n\t"
221da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
222da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac1                            \n\t"
223da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac1                            \n\t"
224da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac2                            \n\t"
225da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac2                            \n\t"
226da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
227da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[load7],       %[cospi_20_64]  \n\t"
228da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[load8],       %[cospi_12_64]  \n\t"
229da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[result3],           $ac1,           31              \n\t"
230da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
231da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac2,                 %[load5],       %[cospi_4_64]   \n\t"
232da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac2,                 %[load6],       %[cospi_28_64]  \n\t"
233da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[result4],           $ac2,           31              \n\t"
234da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
235da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac1                            \n\t"
236da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac1                            \n\t"
237da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac3                            \n\t"
238da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac3                            \n\t"
239da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
240da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],             %[result4],     %[result3]      \n\t"
241da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],             %[load5],       %[result1]      \n\t"
242da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load5],             %[load5],       %[result2]      \n\t"
243da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
244da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],             %[result1],     %[result2]      \n\t"
245da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],             %[load6],       %[result3]      \n\t"
246da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load6],             %[load6],       %[result4]      \n\t"
247da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
248da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[load5],       %[cospi_16_64]  \n\t"
249da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac3,                 %[load6],       %[cospi_16_64]  \n\t"
250da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
251da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[step1_5],           $ac1,           31              \n\t"
252da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[step1_6],           $ac3,           31              \n\t"
253da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[step1_4],           %[result1],     %[result2]      \n\t"
254da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[step1_7],           %[result4],     %[result3]      \n\t"
255da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
256da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [load5] "=&r" (load5), [load6] "=&r" (load6),
257da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [load7] "=&r" (load7), [load8] "=&r" (load8),
258da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [result1] "=&r" (result1), [result2] "=&r" (result2),
259da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [result3] "=&r" (result3), [result4] "=&r" (result4),
260da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step1_4] "=r" (step1_4), [step1_5] "=r" (step1_5),
261da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step1_6] "=r" (step1_6), [step1_7] "=r" (step1_7)
262da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [const_2_power_13] "r" (const_2_power_13), [input] "r" (input),
263da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [cospi_20_64] "r" (cospi_20_64), [cospi_12_64] "r" (cospi_12_64),
264da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [cospi_4_64] "r" (cospi_4_64), [cospi_28_64] "r" (cospi_28_64),
265da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [cospi_16_64] "r" (cospi_16_64)
266da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    );
267da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
268da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    __asm__ __volatile__ (
269da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac0                            \n\t"
270da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac0                            \n\t"
271da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac1                            \n\t"
272da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac1                            \n\t"
273da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
274da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],             %[step2_14],    %[step2_13]     \n\t"
275da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],             %[load5],       %[step2_9]      \n\t"
276da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load5],             %[load5],       %[step2_10]     \n\t"
277da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
278da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac0,                 %[load5],       %[cospi_16_64]  \n\t"
279da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
280da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],             %[step2_14],    %[step2_13]     \n\t"
281da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],             %[load6],       %[step2_10]     \n\t"
282da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load6],             %[load6],       %[step2_9]      \n\t"
283da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
284da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[load6],       %[cospi_16_64]  \n\t"
285da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
286da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac2                            \n\t"
287da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac2                            \n\t"
288da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac3                            \n\t"
289da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac3                            \n\t"
290da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
291da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],             %[step2_15],    %[step2_12]     \n\t"
292da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],             %[load5],       %[step2_8]      \n\t"
293da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load5],             %[load5],       %[step2_11]     \n\t"
294da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
295da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac2,                 %[load5],       %[cospi_16_64]  \n\t"
296da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
297da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],             %[step2_15],    %[step2_12]     \n\t"
298da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],             %[load6],       %[step2_11]     \n\t"
299da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load6],             %[load6],       %[step2_8]      \n\t"
300da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
301da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac3,                 %[load6],       %[cospi_16_64]  \n\t"
302da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
303da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[step1_10],          $ac0,           31              \n\t"
304da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[step1_13],          $ac1,           31              \n\t"
305da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[step1_11],          $ac2,           31              \n\t"
306da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[step1_12],          $ac3,           31              \n\t"
307da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
308da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [load5] "=&r" (load5), [load6] "=&r" (load6),
309da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step1_10] "=r" (step1_10), [step1_11] "=r" (step1_11),
310da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step1_12] "=r" (step1_12), [step1_13] "=r" (step1_13)
311da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [const_2_power_13] "r" (const_2_power_13),
312da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step2_14] "r" (step2_14), [step2_13] "r" (step2_13),
313da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step2_9] "r" (step2_9), [step2_10] "r" (step2_10),
314da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step2_15] "r" (step2_15), [step2_12] "r" (step2_12),
315da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step2_8] "r" (step2_8), [step2_11] "r" (step2_11),
316da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [cospi_16_64] "r" (cospi_16_64)
317da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    );
318da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
319da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    __asm__ __volatile__ (
320da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load5],             %[step1_0],     %[step1_7]      \n\t"
321da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load5],             %[load5],       %[step2_12]     \n\t"
322da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load5],             %[load5],       %[step2_15]     \n\t"
323da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load6],             %[step1_1],     %[step1_6]      \n\t"
324da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load6],             %[load6],       %[step2_13]     \n\t"
325da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load6],             %[load6],       %[step2_14]     \n\t"
326da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sh       %[load5],             0(%[output])                    \n\t"
327da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sh       %[load6],             32(%[output])                   \n\t"
328da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],             %[step1_1],     %[step1_6]      \n\t"
329da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load5],             %[load5],       %[step2_9]      \n\t"
330da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load5],             %[load5],       %[step2_10]     \n\t"
331da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],             %[step1_0],     %[step1_7]      \n\t"
332da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load6],             %[load6],       %[step2_8]      \n\t"
333da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load6],             %[load6],       %[step2_11]     \n\t"
334da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sh       %[load5],             192(%[output])                  \n\t"
335da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sh       %[load6],             224(%[output])                  \n\t"
336da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],             %[step1_0],     %[step1_7]      \n\t"
337da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],             %[load5],       %[step2_8]      \n\t"
338da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],             %[load5],       %[step2_11]     \n\t"
339da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],             %[step1_1],     %[step1_6]      \n\t"
340da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],             %[load6],       %[step2_9]      \n\t"
341da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],             %[load6],       %[step2_10]     \n\t"
342da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sh       %[load5],             256(%[output])                  \n\t"
343da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sh       %[load6],             288(%[output])                  \n\t"
344da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load5],             %[step1_1],     %[step1_6]      \n\t"
345da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],             %[load5],       %[step2_13]     \n\t"
346da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],             %[load5],       %[step2_14]     \n\t"
347da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load6],             %[step1_0],     %[step1_7]      \n\t"
348da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],             %[load6],       %[step2_12]     \n\t"
349da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],             %[load6],       %[step2_15]     \n\t"
350da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sh       %[load5],             448(%[output])                  \n\t"
351da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sh       %[load6],             480(%[output])                  \n\t"
352da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
353da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [load5] "=&r" (load5), [load6] "=&r" (load6)
354da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [output] "r" (output),
355da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step1_0] "r" (step1_0), [step1_1] "r" (step1_1),
356da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step1_6] "r" (step1_6), [step1_7] "r" (step1_7),
357da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step2_8] "r" (step2_8), [step2_9] "r" (step2_9),
358da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step2_10] "r" (step2_10), [step2_11] "r" (step2_11),
359da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step2_12] "r" (step2_12), [step2_13] "r" (step2_13),
360da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step2_14] "r" (step2_14), [step2_15] "r" (step2_15)
361da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    );
362da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
363da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    __asm__ __volatile__ (
364da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load5],             %[step1_2],     %[step1_5]      \n\t"
365da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load5],             %[load5],       %[step1_13]     \n\t"
366da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load6],             %[step1_3],     %[step1_4]      \n\t"
367da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load6],             %[load6],       %[step1_12]     \n\t"
368da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sh       %[load5],             64(%[output])                   \n\t"
369da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sh       %[load6],             96(%[output])                   \n\t"
370da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],             %[step1_3],     %[step1_4]      \n\t"
371da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load5],             %[load5],       %[step1_11]     \n\t"
372da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],             %[step1_2],     %[step1_5]      \n\t"
373da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load6],             %[load6],       %[step1_10]     \n\t"
374da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sh       %[load5],             128(%[output])                  \n\t"
375da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sh       %[load6],             160(%[output])                  \n\t"
376da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],             %[step1_2],     %[step1_5]      \n\t"
377da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],             %[load5],       %[step1_10]     \n\t"
378da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],             %[step1_3],     %[step1_4]      \n\t"
379da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],             %[load6],       %[step1_11]     \n\t"
380da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sh       %[load5],             320(%[output])                  \n\t"
381da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sh       %[load6],             352(%[output])                  \n\t"
382da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load5],             %[step1_3],     %[step1_4]      \n\t"
383da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],             %[load5],       %[step1_12]     \n\t"
384da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load6],             %[step1_2],     %[step1_5]      \n\t"
385da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],             %[load6],       %[step1_13]     \n\t"
386da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sh       %[load5],             384(%[output])                  \n\t"
387da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sh       %[load6],             416(%[output])                  \n\t"
388da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
389da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [load5] "=&r" (load5), [load6] "=&r" (load6)
390da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [output] "r" (output),
391da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step1_2] "r" (step1_2), [step1_3] "r" (step1_3),
392da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step1_4] "r" (step1_4), [step1_5] "r" (step1_5),
393da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step1_10] "r" (step1_10), [step1_11] "r" (step1_11),
394da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step1_12] "r" (step1_12), [step1_13] "r" (step1_13)
395da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    );
396da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
397da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    input += 16;
398da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    output += 1;
399da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  }
400da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian}
401da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
402da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid idct16_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
403da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian                               int dest_stride) {
404da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int i;
405da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7;
406da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int step1_8, step1_9, step1_10, step1_11;
407da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int step1_12, step1_13, step1_14, step1_15;
408da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int step2_0, step2_1, step2_2, step2_3;
409da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int step2_8, step2_9, step2_10, step2_11;
410da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int step2_12, step2_13, step2_14, step2_15;
411da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int load1, load2, load3, load4, load5, load6, load7, load8;
412da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int result1, result2, result3, result4;
413da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  const int const_2_power_13 = 8192;
414da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  uint8_t *dest_pix;
415da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  uint8_t *cm = vpx_ff_cropTbl;
416da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
417da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  /* prefetch vpx_ff_cropTbl */
418da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  prefetch_load(vpx_ff_cropTbl);
419da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  prefetch_load(vpx_ff_cropTbl +  32);
420da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  prefetch_load(vpx_ff_cropTbl +  64);
421da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  prefetch_load(vpx_ff_cropTbl +  96);
422da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  prefetch_load(vpx_ff_cropTbl + 128);
423da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  prefetch_load(vpx_ff_cropTbl + 160);
424da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  prefetch_load(vpx_ff_cropTbl + 192);
425da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  prefetch_load(vpx_ff_cropTbl + 224);
426da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
427da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  for (i = 0; i < 16; ++i) {
428da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    dest_pix = (dest + i);
429da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    __asm__ __volatile__ (
430da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load1],              0(%[input])                    \n\t"
431da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load2],             16(%[input])                    \n\t"
432da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load3],              8(%[input])                    \n\t"
433da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load4],             24(%[input])                    \n\t"
434da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
435da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac1                            \n\t"
436da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac1                            \n\t"
437da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac2                            \n\t"
438da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac2                            \n\t"
439da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[result1],           %[load1],       %[load2]        \n\t"
440da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[result2],           %[load1],       %[load2]        \n\t"
441da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[result1],     %[cospi_16_64]  \n\t"
442da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac2,                 %[result2],     %[cospi_16_64]  \n\t"
443da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[step2_0],           $ac1,           31              \n\t"
444da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[step2_1],           $ac2,           31              \n\t"
445da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
446da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac3                            \n\t"
447da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac3                            \n\t"
448da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac3,                 %[load3],       %[cospi_24_64]  \n\t"
449da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "msub     $ac3,                 %[load4],       %[cospi_8_64]   \n\t"
450da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[step2_2],           $ac3,           31              \n\t"
451da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
452da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac1                            \n\t"
453da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac1                            \n\t"
454da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[load3],       %[cospi_8_64]   \n\t"
455da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[load4],       %[cospi_24_64]  \n\t"
456da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[step2_3],           $ac1,           31              \n\t"
457da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
458da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[step1_0],           %[step2_0],     %[step2_3]      \n\t"
459da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[step1_1],           %[step2_1],     %[step2_2]      \n\t"
460da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[step1_2],           %[step2_1],     %[step2_2]      \n\t"
461da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[step1_3],           %[step2_0],     %[step2_3]      \n\t"
462da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
463da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [load1] "=&r" (load1), [load2] "=&r" (load2),
464da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [load3] "=&r" (load3), [load4] "=&r" (load4),
465da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [result1] "=&r" (result1), [result2] "=&r" (result2),
466da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step2_0] "=&r" (step2_0), [step2_1] "=&r" (step2_1),
467da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step2_2] "=&r" (step2_2), [step2_3] "=&r" (step2_3),
468da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step1_0] "=r" (step1_0), [step1_1] "=r" (step1_1),
469da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step1_2] "=r" (step1_2), [step1_3] "=r" (step1_3)
470da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [const_2_power_13] "r" (const_2_power_13), [input] "r" (input),
471da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [cospi_24_64] "r" (cospi_24_64), [cospi_8_64] "r" (cospi_8_64),
472da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [cospi_16_64] "r" (cospi_16_64)
473da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    );
474da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
475da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    __asm__ __volatile__ (
476da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load5],             2(%[input])                     \n\t"
477da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load6],             30(%[input])                    \n\t"
478da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load7],             18(%[input])                    \n\t"
479da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load8],             14(%[input])                    \n\t"
480da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
481da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac1                            \n\t"
482da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac1                            \n\t"
483da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac3                            \n\t"
484da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac3                            \n\t"
485da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
486da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[load5],       %[cospi_30_64]  \n\t"
487da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "msub     $ac1,                 %[load6],       %[cospi_2_64]   \n\t"
488da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[result1],           $ac1,           31              \n\t"
489da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
490da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac3,                 %[load7],       %[cospi_14_64]  \n\t"
491da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "msub     $ac3,                 %[load8],       %[cospi_18_64]  \n\t"
492da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[result2],           $ac3,           31              \n\t"
493da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
494da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac1                            \n\t"
495da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac1                            \n\t"
496da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac2                            \n\t"
497da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac2                            \n\t"
498da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
499da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[load7],       %[cospi_18_64]  \n\t"
500da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[load8],       %[cospi_14_64]  \n\t"
501da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[result3],           $ac1,           31              \n\t"
502da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
503da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac2,                 %[load5],        %[cospi_2_64]  \n\t"
504da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac2,                 %[load6],        %[cospi_30_64] \n\t"
505da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[result4],           $ac2,            31             \n\t"
506da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
507da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],             %[result1],     %[result2]      \n\t"
508da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],             %[result4],     %[result3]      \n\t"
509da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
510da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac1                            \n\t"
511da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac1                            \n\t"
512da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac3                            \n\t"
513da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac3                            \n\t"
514da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
515da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[load6],       %[cospi_24_64]  \n\t"
516da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "msub     $ac1,                 %[load5],       %[cospi_8_64]   \n\t"
517da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac3,                 %[load5],       %[cospi_24_64]  \n\t"
518da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac3,                 %[load6],       %[cospi_8_64]   \n\t"
519da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
520da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[step2_9],           $ac1,           31              \n\t"
521da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[step2_14],          $ac3,           31              \n\t"
522da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[step2_8],           %[result1],     %[result2]      \n\t"
523da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[step2_15],          %[result4],     %[result3]      \n\t"
524da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
525da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [load5] "=&r" (load5), [load6] "=&r" (load6),
526da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [load7] "=&r" (load7), [load8] "=&r" (load8),
527da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [result1] "=&r" (result1), [result2] "=&r" (result2),
528da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [result3] "=&r" (result3), [result4] "=&r" (result4),
529da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step2_8] "=r" (step2_8), [step2_15] "=r" (step2_15),
530da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step2_9] "=r" (step2_9), [step2_14] "=r" (step2_14)
531da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [const_2_power_13] "r" (const_2_power_13), [input] "r" (input),
532da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [cospi_30_64] "r" (cospi_30_64), [cospi_2_64] "r" (cospi_2_64),
533da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [cospi_14_64] "r" (cospi_14_64), [cospi_18_64] "r" (cospi_18_64),
534da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [cospi_24_64] "r" (cospi_24_64), [cospi_8_64] "r" (cospi_8_64)
535da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    );
536da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
537da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    __asm__ __volatile__ (
538da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load1],             10(%[input])                    \n\t"
539da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load2],             22(%[input])                    \n\t"
540da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load3],             26(%[input])                    \n\t"
541da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load4],             6(%[input])                     \n\t"
542da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
543da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac1                            \n\t"
544da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac1                            \n\t"
545da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac3                            \n\t"
546da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac3                            \n\t"
547da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
548da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[load1],    %[cospi_22_64]     \n\t"
549da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "msub     $ac1,                 %[load2],    %[cospi_10_64]     \n\t"
550da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[result1],           $ac1,        31                 \n\t"
551da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
552da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac3,                 %[load3],    %[cospi_6_64]      \n\t"
553da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "msub     $ac3,                 %[load4],    %[cospi_26_64]     \n\t"
554da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[result2],           $ac3,        31                 \n\t"
555da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
556da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac1                            \n\t"
557da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac1                            \n\t"
558da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac2                            \n\t"
559da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac2                            \n\t"
560da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
561da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[load1],    %[cospi_10_64]     \n\t"
562da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[load2],    %[cospi_22_64]     \n\t"
563da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[result3],           $ac1,        31                 \n\t"
564da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
565da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac2,                 %[load3],    %[cospi_26_64]     \n\t"
566da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac2,                 %[load4],    %[cospi_6_64]      \n\t"
567da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[result4],           $ac2,        31                 \n\t"
568da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
569da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac1                            \n\t"
570da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac1                            \n\t"
571da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac3                            \n\t"
572da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac3                            \n\t"
573da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
574da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load1],             %[result2],     %[result1]      \n\t"
575da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load2],             %[result4],     %[result3]      \n\t"
576da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
577da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "msub     $ac1,                 %[load1],       %[cospi_24_64]  \n\t"
578da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "msub     $ac1,                 %[load2],       %[cospi_8_64]   \n\t"
579da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac3,                 %[load2],       %[cospi_24_64]  \n\t"
580da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "msub     $ac3,                 %[load1],       %[cospi_8_64]   \n\t"
581da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
582da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[step2_10],          $ac1,           31              \n\t"
583da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[step2_13],          $ac3,           31              \n\t"
584da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[step2_11],          %[result1],     %[result2]      \n\t"
585da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[step2_12],          %[result4],     %[result3]      \n\t"
586da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
587da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [load1] "=&r" (load1), [load2] "=&r" (load2),
588da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [load3] "=&r" (load3), [load4] "=&r" (load4),
589da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [result1] "=&r" (result1), [result2] "=&r" (result2),
590da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [result3] "=&r" (result3), [result4] "=&r" (result4),
591da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step2_10] "=r" (step2_10), [step2_11] "=r" (step2_11),
592da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step2_12] "=r" (step2_12), [step2_13] "=r" (step2_13)
593da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [const_2_power_13] "r" (const_2_power_13), [input] "r" (input),
594da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [cospi_22_64] "r" (cospi_22_64), [cospi_10_64] "r" (cospi_10_64),
595da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [cospi_6_64] "r" (cospi_6_64), [cospi_26_64] "r" (cospi_26_64),
596da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [cospi_24_64] "r" (cospi_24_64), [cospi_8_64] "r" (cospi_8_64)
597da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    );
598da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
599da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    __asm__ __volatile__ (
600da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load5],             4(%[input])                   \n\t"
601da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load6],             28(%[input])                  \n\t"
602da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load7],             20(%[input])                  \n\t"
603da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lh       %[load8],             12(%[input])                  \n\t"
604da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
605da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac1                          \n\t"
606da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac1                          \n\t"
607da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac3                          \n\t"
608da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac3                          \n\t"
609da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
610da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[load5],    %[cospi_28_64]   \n\t"
611da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "msub     $ac1,                 %[load6],    %[cospi_4_64]    \n\t"
612da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[result1],           $ac1,        31               \n\t"
613da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
614da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac3,                 %[load7],    %[cospi_12_64]   \n\t"
615da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "msub     $ac3,                 %[load8],    %[cospi_20_64]   \n\t"
616da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[result2],           $ac3,        31               \n\t"
617da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
618da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac1                          \n\t"
619da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac1                          \n\t"
620da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac2                          \n\t"
621da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac2                          \n\t"
622da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
623da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[load7],    %[cospi_20_64]   \n\t"
624da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[load8],    %[cospi_12_64]   \n\t"
625da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[result3],           $ac1,        31               \n\t"
626da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
627da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac2,                 %[load5],    %[cospi_4_64]    \n\t"
628da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac2,                 %[load6],    %[cospi_28_64]   \n\t"
629da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[result4],           $ac2,        31               \n\t"
630da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
631da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac1                            \n\t"
632da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac1                            \n\t"
633da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac3                            \n\t"
634da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac3                            \n\t"
635da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
636da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],             %[result4],     %[result3]      \n\t"
637da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],             %[load5],       %[result1]      \n\t"
638da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load5],             %[load5],       %[result2]      \n\t"
639da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
640da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],             %[result1],     %[result2]      \n\t"
641da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],             %[load6],       %[result3]      \n\t"
642da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load6],             %[load6],       %[result4]      \n\t"
643da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
644da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[load5],       %[cospi_16_64]  \n\t"
645da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac3,                 %[load6],       %[cospi_16_64]  \n\t"
646da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
647da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[step1_5],           $ac1,           31              \n\t"
648da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[step1_6],           $ac3,           31              \n\t"
649da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
650da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[step1_4],           %[result1],     %[result2]      \n\t"
651da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[step1_7],           %[result4],     %[result3]      \n\t"
652da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
653da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [load5] "=&r" (load5), [load6] "=&r" (load6),
654da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [load7] "=&r" (load7), [load8] "=&r" (load8),
655da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [result1] "=&r" (result1), [result2] "=&r" (result2),
656da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [result3] "=&r" (result3), [result4] "=&r" (result4),
657da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step1_4] "=r" (step1_4), [step1_5] "=r" (step1_5),
658da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step1_6] "=r" (step1_6), [step1_7] "=r" (step1_7)
659da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [const_2_power_13] "r" (const_2_power_13), [input] "r" (input),
660da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [cospi_20_64] "r" (cospi_20_64), [cospi_12_64] "r" (cospi_12_64),
661da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [cospi_4_64] "r" (cospi_4_64), [cospi_28_64] "r" (cospi_28_64),
662da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [cospi_16_64] "r" (cospi_16_64)
663da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    );
664da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
665da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    __asm__ __volatile__ (
666da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac0                            \n\t"
667da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac0                            \n\t"
668da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac1                            \n\t"
669da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac1                            \n\t"
670da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
671da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],             %[step2_14],    %[step2_13]     \n\t"
672da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],             %[load5],       %[step2_9]      \n\t"
673da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load5],             %[load5],       %[step2_10]     \n\t"
674da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
675da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac0,                 %[load5],       %[cospi_16_64]  \n\t"
676da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
677da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],             %[step2_14],    %[step2_13]     \n\t"
678da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],             %[load6],       %[step2_10]     \n\t"
679da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load6],             %[load6],       %[step2_9]      \n\t"
680da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
681da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac1,                 %[load6],       %[cospi_16_64]  \n\t"
682da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
683da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac2                            \n\t"
684da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac2                            \n\t"
685da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mtlo     %[const_2_power_13],  $ac3                            \n\t"
686da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "mthi     $zero,                $ac3                            \n\t"
687da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
688da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],             %[step2_15],    %[step2_12]     \n\t"
689da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],             %[load5],       %[step2_8]      \n\t"
690da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load5],             %[load5],       %[step2_11]     \n\t"
691da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
692da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac2,                 %[load5],       %[cospi_16_64]  \n\t"
693da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
694da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],             %[step2_15],    %[step2_12]     \n\t"
695da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],             %[load6],       %[step2_11]     \n\t"
696da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load6],             %[load6],       %[step2_8]      \n\t"
697da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
698da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "madd     $ac3,                 %[load6],       %[cospi_16_64]  \n\t"
699da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
700da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[step1_10],          $ac0,           31              \n\t"
701da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[step1_13],          $ac1,           31              \n\t"
702da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[step1_11],          $ac2,           31              \n\t"
703da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "extp     %[step1_12],          $ac3,           31              \n\t"
704da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
705da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [load5] "=&r" (load5), [load6] "=&r" (load6),
706da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step1_10] "=r" (step1_10), [step1_11] "=r" (step1_11),
707da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step1_12] "=r" (step1_12), [step1_13] "=r" (step1_13)
708da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [const_2_power_13] "r" (const_2_power_13),
709da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step2_14] "r" (step2_14), [step2_13] "r" (step2_13),
710da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step2_9] "r" (step2_9), [step2_10] "r" (step2_10),
711da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step2_15] "r" (step2_15), [step2_12] "r" (step2_12),
712da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step2_8] "r" (step2_8), [step2_11] "r" (step2_11),
713da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [cospi_16_64] "r" (cospi_16_64)
714da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    );
715da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
716da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    step1_8 = step2_8 + step2_11;
717da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    step1_9 = step2_9 + step2_10;
718da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    step1_14 = step2_13 + step2_14;
719da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    step1_15 = step2_12 + step2_15;
720da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
721da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    __asm__ __volatile__ (
722da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbu      %[load7],         0(%[dest_pix])                      \n\t"
723da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load5],         %[step1_0],         %[step1_7]      \n\t"
724da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load5],         %[load5],           %[step1_15]     \n\t"
725da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addi     %[load5],         %[load5],           32              \n\t"
726da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sra      %[load5],         %[load5],           6               \n\t"
727da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load7],         %[load7],           %[load5]        \n\t"
728da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbux     %[load5],         %[load7](%[cm])                     \n\t"
729da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load6],         %[step1_1],         %[step1_6]      \n\t"
730da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load6],         %[load6],           %[step1_14]     \n\t"
731da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sb       %[load5],         0(%[dest_pix])                      \n\t"
732da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addu     %[dest_pix],      %[dest_pix],        %[dest_stride]  \n\t"
733da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbu      %[load8],         0(%[dest_pix])                      \n\t"
734da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addi     %[load6],         %[load6],           32              \n\t"
735da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sra      %[load6],         %[load6],           6               \n\t"
736da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load8],         %[load8],           %[load6]        \n\t"
737da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbux     %[load6],         %[load8](%[cm])                     \n\t"
738da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sb       %[load6],         0(%[dest_pix])                      \n\t"
739da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addu     %[dest_pix],      %[dest_pix],        %[dest_stride]  \n\t"
740da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
741da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbu      %[load7],         0(%[dest_pix])                      \n\t"
742da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load5],         %[step1_2],         %[step1_5]      \n\t"
743da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load5],         %[load5],           %[step1_13]     \n\t"
744da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addi     %[load5],         %[load5],           32              \n\t"
745da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sra      %[load5],         %[load5],           6               \n\t"
746da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load7],         %[load7],           %[load5]        \n\t"
747da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbux     %[load5],         %[load7](%[cm])                     \n\t"
748da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load6],         %[step1_3],         %[step1_4]      \n\t"
749da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load6],         %[load6],           %[step1_12]     \n\t"
750da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sb       %[load5],         0(%[dest_pix])                      \n\t"
751da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addu     %[dest_pix],      %[dest_pix],        %[dest_stride]  \n\t"
752da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbu      %[load8],         0(%[dest_pix])                      \n\t"
753da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addi     %[load6],         %[load6],           32              \n\t"
754da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sra      %[load6],         %[load6],           6               \n\t"
755da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load8],         %[load8],           %[load6]        \n\t"
756da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbux     %[load6],         %[load8](%[cm])                     \n\t"
757da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sb       %[load6],         0(%[dest_pix])                      \n\t"
758da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addu     %[dest_pix],      %[dest_pix],        %[dest_stride]  \n\t"
759da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
760da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbu      %[load7],         0(%[dest_pix])                      \n\t"
761da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],         %[step1_3],         %[step1_4]      \n\t"
762da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load5],         %[load5],           %[step1_11]     \n\t"
763da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addi     %[load5],         %[load5],           32              \n\t"
764da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sra      %[load5],         %[load5],           6               \n\t"
765da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load7],         %[load7],           %[load5]        \n\t"
766da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbux     %[load5],         %[load7](%[cm])                     \n\t"
767da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],         %[step1_2],         %[step1_5]      \n\t"
768da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load6],         %[load6],           %[step1_10]     \n\t"
769da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sb       %[load5],         0(%[dest_pix])                      \n\t"
770da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addu     %[dest_pix],      %[dest_pix],        %[dest_stride]  \n\t"
771da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbu      %[load8],         0(%[dest_pix])                      \n\t"
772da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addi     %[load6],         %[load6],           32              \n\t"
773da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sra      %[load6],         %[load6],           6               \n\t"
774da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load8],         %[load8],           %[load6]        \n\t"
775da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbux     %[load6],         %[load8](%[cm])                     \n\t"
776da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sb       %[load6],         0(%[dest_pix])                      \n\t"
777da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addu     %[dest_pix],      %[dest_pix],        %[dest_stride]  \n\t"
778da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
779da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],         %[step1_1],         %[step1_6]      \n\t"
780da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbu      %[load7],         0(%[dest_pix])                      \n\t"
781da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load5],         %[load5],           %[step1_9]      \n\t"
782da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addi     %[load5],         %[load5],           32              \n\t"
783da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sra      %[load5],         %[load5],           6               \n\t"
784da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load7],         %[load7],           %[load5]        \n\t"
785da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbux     %[load5],         %[load7](%[cm])                     \n\t"
786da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],         %[step1_0],         %[step1_7]      \n\t"
787da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load6],         %[load6],           %[step1_8]      \n\t"
788da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sb       %[load5],         0(%[dest_pix])                      \n\t"
789da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addu     %[dest_pix],      %[dest_pix],        %[dest_stride]  \n\t"
790da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbu      %[load8],         0(%[dest_pix])                      \n\t"
791da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addi     %[load6],         %[load6],           32              \n\t"
792da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sra      %[load6],         %[load6],           6               \n\t"
793da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load8],         %[load8],           %[load6]        \n\t"
794da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbux     %[load6],         %[load8](%[cm])                     \n\t"
795da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sb       %[load6],         0(%[dest_pix])                      \n\t"
796da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addu     %[dest_pix],      %[dest_pix],        %[dest_stride]  \n\t"
797da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
798da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbu      %[load7],         0(%[dest_pix])                      \n\t"
799da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],         %[step1_0],         %[step1_7]      \n\t"
800da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],         %[load5],           %[step1_8]      \n\t"
801da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addi     %[load5],         %[load5],           32              \n\t"
802da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sra      %[load5],         %[load5],           6               \n\t"
803da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load7],         %[load7],           %[load5]        \n\t"
804da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbux     %[load5],         %[load7](%[cm])                     \n\t"
805da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],         %[step1_1],         %[step1_6]      \n\t"
806da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],         %[load6],           %[step1_9]      \n\t"
807da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sb       %[load5],         0(%[dest_pix])                      \n\t"
808da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addu     %[dest_pix],      %[dest_pix],        %[dest_stride]  \n\t"
809da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbu      %[load8],         0(%[dest_pix])                      \n\t"
810da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addi     %[load6],         %[load6],           32              \n\t"
811da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sra      %[load6],         %[load6],           6               \n\t"
812da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load8],         %[load8],           %[load6]        \n\t"
813da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbux     %[load6],         %[load8](%[cm])                     \n\t"
814da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sb       %[load6],         0(%[dest_pix])                      \n\t"
815da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addu     %[dest_pix],      %[dest_pix],        %[dest_stride]  \n\t"
816da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
817da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbu      %[load7],         0(%[dest_pix])                      \n\t"
818da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],         %[step1_2],         %[step1_5]      \n\t"
819da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],         %[load5],           %[step1_10]     \n\t"
820da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addi     %[load5],         %[load5],           32              \n\t"
821da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sra      %[load5],         %[load5],           6               \n\t"
822da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load7],         %[load7],           %[load5]        \n\t"
823da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbux     %[load5],         %[load7](%[cm])                     \n\t"
824da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],         %[step1_3],         %[step1_4]      \n\t"
825da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],         %[load6],           %[step1_11]     \n\t"
826da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sb       %[load5],         0(%[dest_pix])                      \n\t"
827da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addu     %[dest_pix],      %[dest_pix],        %[dest_stride]  \n\t"
828da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbu      %[load8],         0(%[dest_pix])                      \n\t"
829da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addi     %[load6],         %[load6],           32              \n\t"
830da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sra      %[load6],         %[load6],           6               \n\t"
831da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load8],         %[load8],           %[load6]        \n\t"
832da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbux     %[load6],         %[load8](%[cm])                     \n\t"
833da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sb       %[load6],         0(%[dest_pix])                      \n\t"
834da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addu     %[dest_pix],      %[dest_pix],        %[dest_stride]  \n\t"
835da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
836da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbu      %[load7],         0(%[dest_pix])                      \n\t"
837da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load5],         %[step1_3],         %[step1_4]      \n\t"
838da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],         %[load5],           %[step1_12]     \n\t"
839da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addi     %[load5],         %[load5],           32              \n\t"
840da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sra      %[load5],         %[load5],           6               \n\t"
841da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load7],         %[load7],           %[load5]        \n\t"
842da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbux     %[load5],         %[load7](%[cm])                     \n\t"
843da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load6],         %[step1_2],         %[step1_5]      \n\t"
844da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],         %[load6],           %[step1_13]     \n\t"
845da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sb       %[load5],         0(%[dest_pix])                      \n\t"
846da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addu     %[dest_pix],      %[dest_pix],        %[dest_stride]  \n\t"
847da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbu      %[load8],         0(%[dest_pix])                      \n\t"
848da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addi     %[load6],         %[load6],           32              \n\t"
849da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sra      %[load6],         %[load6],           6               \n\t"
850da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load8],         %[load8],           %[load6]        \n\t"
851da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbux     %[load6],         %[load8](%[cm])                     \n\t"
852da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sb       %[load6],         0(%[dest_pix])                      \n\t"
853da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addu     %[dest_pix],      %[dest_pix],        %[dest_stride]  \n\t"
854da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
855da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbu      %[load7],         0(%[dest_pix])                      \n\t"
856da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load5],         %[step1_1],         %[step1_6]      \n\t"
857da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load5],         %[load5],           %[step1_14]     \n\t"
858da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addi     %[load5],         %[load5],           32              \n\t"
859da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sra      %[load5],         %[load5],           6               \n\t"
860da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load7],         %[load7],           %[load5]        \n\t"
861da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbux     %[load5],         %[load7](%[cm])                     \n\t"
862da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load6],         %[step1_0],         %[step1_7]      \n\t"
863da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sub      %[load6],         %[load6],           %[step1_15]     \n\t"
864da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sb       %[load5],         0(%[dest_pix])                      \n\t"
865da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addu     %[dest_pix],      %[dest_pix],        %[dest_stride]  \n\t"
866da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbu      %[load8],         0(%[dest_pix])                      \n\t"
867da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "addi     %[load6],         %[load6],           32              \n\t"
868da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sra      %[load6],         %[load6],           6               \n\t"
869da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "add      %[load8],         %[load8],           %[load6]        \n\t"
870da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "lbux     %[load6],         %[load8](%[cm])                     \n\t"
871da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sb       %[load6],         0(%[dest_pix])                      \n\t"
872da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
873da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [load5] "=&r" (load5), [load6] "=&r" (load6), [load7] "=&r" (load7),
874da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [load8] "=&r" (load8), [dest_pix] "+r" (dest_pix)
875da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [cm] "r" (cm), [dest_stride] "r" (dest_stride),
876da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step1_0] "r" (step1_0), [step1_1] "r" (step1_1),
877da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step1_2] "r" (step1_2), [step1_3] "r" (step1_3),
878da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step1_4] "r" (step1_4), [step1_5] "r" (step1_5),
879da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step1_6] "r" (step1_6), [step1_7] "r" (step1_7),
880da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step1_8] "r" (step1_8), [step1_9] "r" (step1_9),
881da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step1_10] "r" (step1_10), [step1_11] "r" (step1_11),
882da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step1_12] "r" (step1_12), [step1_13] "r" (step1_13),
883da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          [step1_14] "r" (step1_14), [step1_15] "r" (step1_15)
884da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    );
885da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
886da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    input += 16;
887da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  }
888da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian}
889da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
890da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid vpx_idct16x16_256_add_dspr2(const int16_t *input, uint8_t *dest,
891da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian                                 int dest_stride) {
892da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  DECLARE_ALIGNED(32, int16_t,  out[16 * 16]);
893da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  uint32_t pos = 45;
894da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
895da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  /* bit positon for extract from acc */
896da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  __asm__ __volatile__ (
897da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    "wrdsp    %[pos],    1    \n\t"
898da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    :
899da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    : [pos] "r" (pos)
900da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  );
901da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
902da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  // First transform rows
903da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  idct16_rows_dspr2(input, out, 16);
904da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
905da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  // Then transform columns and add to dest
906da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  idct16_cols_add_blk_dspr2(out, dest, dest_stride);
907da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian}
908da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
909da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid vpx_idct16x16_10_add_dspr2(const int16_t *input, uint8_t *dest,
910da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian                                int dest_stride) {
911da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  DECLARE_ALIGNED(32, int16_t,  out[16 * 16]);
912da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int16_t *outptr = out;
913da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  uint32_t i;
914da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  uint32_t pos = 45;
915da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
916da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  /* bit positon for extract from acc */
917da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  __asm__ __volatile__ (
918da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    "wrdsp    %[pos],    1    \n\t"
919da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    :
920da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    : [pos] "r" (pos)
921da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  );
922da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
923da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  // First transform rows. Since all non-zero dct coefficients are in
924da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  // upper-left 4x4 area, we only need to calculate first 4 rows here.
925da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  idct16_rows_dspr2(input, outptr, 4);
926da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
927da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  outptr += 4;
928da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  for (i = 0; i < 6; ++i) {
929da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    __asm__ __volatile__ (
930da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sw     $zero,    0(%[outptr])     \n\t"
931da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sw     $zero,   32(%[outptr])     \n\t"
932da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sw     $zero,   64(%[outptr])     \n\t"
933da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sw     $zero,   96(%[outptr])     \n\t"
934da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sw     $zero,  128(%[outptr])     \n\t"
935da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sw     $zero,  160(%[outptr])     \n\t"
936da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sw     $zero,  192(%[outptr])     \n\t"
937da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sw     $zero,  224(%[outptr])     \n\t"
938da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sw     $zero,  256(%[outptr])     \n\t"
939da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sw     $zero,  288(%[outptr])     \n\t"
940da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sw     $zero,  320(%[outptr])     \n\t"
941da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sw     $zero,  352(%[outptr])     \n\t"
942da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sw     $zero,  384(%[outptr])     \n\t"
943da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sw     $zero,  416(%[outptr])     \n\t"
944da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sw     $zero,  448(%[outptr])     \n\t"
945da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "sw     $zero,  480(%[outptr])     \n\t"
946da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
947da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        :
948da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [outptr] "r" (outptr)
949da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    );
950da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
951da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    outptr += 2;
952da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  }
953da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
954da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  // Then transform columns
955da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  idct16_cols_add_blk_dspr2(out, dest, dest_stride);
956da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian}
957da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
958da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid vpx_idct16x16_1_add_dspr2(const int16_t *input, uint8_t *dest,
959da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian                               int dest_stride) {
960da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  uint32_t pos = 45;
961da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int32_t out;
962da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int32_t r;
963da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int32_t a1, absa1;
964da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int32_t vector_a1;
965da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int32_t t1, t2, t3, t4;
966da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int32_t vector_1, vector_2, vector_3, vector_4;
967da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
968da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  /* bit positon for extract from acc */
969da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  __asm__ __volatile__ (
970da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    "wrdsp      %[pos],     1           \n\t"
971da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
972da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    :
973da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    : [pos] "r" (pos)
974da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  );
975da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
976da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  out = DCT_CONST_ROUND_SHIFT_TWICE_COSPI_16_64(input[0]);
977da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  __asm__ __volatile__ (
978da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      "addi     %[out],     %[out],     32      \n\t"
979da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      "sra      %[a1],      %[out],     6       \n\t"
980da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
981da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      : [out] "+r" (out), [a1] "=r" (a1)
982da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      :
983da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  );
984da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
985da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  if (a1 < 0) {
986da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    /* use quad-byte
987da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian     * input and output memory are four byte aligned */
988da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    __asm__ __volatile__ (
989da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "abs        %[absa1],       %[a1]       \n\t"
990da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "replv.qb   %[vector_a1],   %[absa1]    \n\t"
991da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
992da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [absa1] "=r" (absa1), [vector_a1] "=r" (vector_a1)
993da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [a1] "r" (a1)
994da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    );
995da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
996da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    for (r = 16; r--;) {
997da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      __asm__ __volatile__ (
998da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          "lw             %[t1],          0(%[dest])                      \n\t"
999da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          "lw             %[t2],          4(%[dest])                      \n\t"
1000da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          "lw             %[t3],          8(%[dest])                      \n\t"
1001da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          "lw             %[t4],          12(%[dest])                     \n\t"
1002da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          "subu_s.qb      %[vector_1],    %[t1],          %[vector_a1]    \n\t"
1003da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          "subu_s.qb      %[vector_2],    %[t2],          %[vector_a1]    \n\t"
1004da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          "subu_s.qb      %[vector_3],    %[t3],          %[vector_a1]    \n\t"
1005da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          "subu_s.qb      %[vector_4],    %[t4],          %[vector_a1]    \n\t"
1006da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          "sw             %[vector_1],    0(%[dest])                      \n\t"
1007da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          "sw             %[vector_2],    4(%[dest])                      \n\t"
1008da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          "sw             %[vector_3],    8(%[dest])                      \n\t"
1009da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          "sw             %[vector_4],    12(%[dest])                     \n\t"
1010da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          "add            %[dest],        %[dest],        %[dest_stride]  \n\t"
1011da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
1012da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          : [t1] "=&r" (t1), [t2] "=&r" (t2), [t3] "=&r" (t3), [t4] "=&r" (t4),
1013da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian            [vector_1] "=&r" (vector_1), [vector_2] "=&r" (vector_2),
1014da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian            [vector_3] "=&r" (vector_3), [vector_4] "=&r" (vector_4),
1015da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian            [dest] "+&r" (dest)
1016da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          : [dest_stride] "r" (dest_stride), [vector_a1] "r" (vector_a1)
1017da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      );
1018da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    }
1019da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  } else {
1020da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    /* use quad-byte
1021da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian     * input and output memory are four byte aligned */
1022da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    __asm__ __volatile__ (
1023da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        "replv.qb   %[vector_a1],   %[a1]   \n\t"
1024da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
1025da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [vector_a1] "=r" (vector_a1)
1026da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        : [a1] "r" (a1)
1027da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    );
1028da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
1029da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    for (r = 16; r--;) {
1030da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      __asm__ __volatile__ (
1031da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          "lw             %[t1],          0(%[dest])                      \n\t"
1032da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          "lw             %[t2],          4(%[dest])                      \n\t"
1033da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          "lw             %[t3],          8(%[dest])                      \n\t"
1034da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          "lw             %[t4],          12(%[dest])                     \n\t"
1035da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          "addu_s.qb      %[vector_1],    %[t1],          %[vector_a1]    \n\t"
1036da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          "addu_s.qb      %[vector_2],    %[t2],          %[vector_a1]    \n\t"
1037da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          "addu_s.qb      %[vector_3],    %[t3],          %[vector_a1]    \n\t"
1038da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          "addu_s.qb      %[vector_4],    %[t4],          %[vector_a1]    \n\t"
1039da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          "sw             %[vector_1],    0(%[dest])                      \n\t"
1040da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          "sw             %[vector_2],    4(%[dest])                      \n\t"
1041da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          "sw             %[vector_3],    8(%[dest])                      \n\t"
1042da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          "sw             %[vector_4],    12(%[dest])                     \n\t"
1043da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          "add            %[dest],        %[dest],        %[dest_stride]  \n\t"
1044da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
1045da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          : [t1] "=&r" (t1), [t2] "=&r" (t2), [t3] "=&r" (t3), [t4] "=&r" (t4),
1046da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian            [vector_1] "=&r" (vector_1), [vector_2] "=&r" (vector_2),
1047da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian            [vector_3] "=&r" (vector_3), [vector_4] "=&r" (vector_4),
1048da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian            [dest] "+&r" (dest)
1049da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          : [dest_stride] "r" (dest_stride), [vector_a1] "r" (vector_a1)
1050da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      );
1051da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    }
1052da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  }
1053da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian}
1054da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
1055da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid iadst16_dspr2(const int16_t *input, int16_t *output) {
1056da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15;
1057da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
1058da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int x0 = input[15];
1059da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int x1 = input[0];
1060da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int x2 = input[13];
1061da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int x3 = input[2];
1062da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int x4 = input[11];
1063da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int x5 = input[4];
1064da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int x6 = input[9];
1065da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int x7 = input[6];
1066da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int x8 = input[7];
1067da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int x9 = input[8];
1068da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int x10 = input[5];
1069da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int x11 = input[10];
1070da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int x12 = input[3];
1071da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int x13 = input[12];
1072da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int x14 = input[1];
1073da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int x15 = input[14];
1074da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
1075da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8
1076da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian           | x9 | x10 | x11 | x12 | x13 | x14 | x15)) {
1077da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    output[0] = output[1] = output[2] = output[3] = output[4]
1078da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian              = output[5] = output[6] = output[7] = output[8]
1079da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian              = output[9] = output[10] = output[11] = output[12]
1080da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian              = output[13] = output[14] = output[15] = 0;
1081da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    return;
1082da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  }
1083da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
1084da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  // stage 1
1085da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s0 = x0 * cospi_1_64  + x1 * cospi_31_64;
1086da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s1 = x0 * cospi_31_64 - x1 * cospi_1_64;
1087da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s2 = x2 * cospi_5_64  + x3 * cospi_27_64;
1088da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s3 = x2 * cospi_27_64 - x3 * cospi_5_64;
1089da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s4 = x4 * cospi_9_64  + x5 * cospi_23_64;
1090da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s5 = x4 * cospi_23_64 - x5 * cospi_9_64;
1091da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s6 = x6 * cospi_13_64 + x7 * cospi_19_64;
1092da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s7 = x6 * cospi_19_64 - x7 * cospi_13_64;
1093da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s8 = x8 * cospi_17_64 + x9 * cospi_15_64;
1094da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s9 = x8 * cospi_15_64 - x9 * cospi_17_64;
1095da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s10 = x10 * cospi_21_64 + x11 * cospi_11_64;
1096da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s11 = x10 * cospi_11_64 - x11 * cospi_21_64;
1097da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s12 = x12 * cospi_25_64 + x13 * cospi_7_64;
1098da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s13 = x12 * cospi_7_64  - x13 * cospi_25_64;
1099da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s14 = x14 * cospi_29_64 + x15 * cospi_3_64;
1100da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s15 = x14 * cospi_3_64  - x15 * cospi_29_64;
1101da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
1102da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x0 = dct_const_round_shift(s0 + s8);
1103da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x1 = dct_const_round_shift(s1 + s9);
1104da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x2 = dct_const_round_shift(s2 + s10);
1105da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x3 = dct_const_round_shift(s3 + s11);
1106da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x4 = dct_const_round_shift(s4 + s12);
1107da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x5 = dct_const_round_shift(s5 + s13);
1108da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x6 = dct_const_round_shift(s6 + s14);
1109da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x7 = dct_const_round_shift(s7 + s15);
1110da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x8  = dct_const_round_shift(s0 - s8);
1111da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x9  = dct_const_round_shift(s1 - s9);
1112da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x10 = dct_const_round_shift(s2 - s10);
1113da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x11 = dct_const_round_shift(s3 - s11);
1114da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x12 = dct_const_round_shift(s4 - s12);
1115da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x13 = dct_const_round_shift(s5 - s13);
1116da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x14 = dct_const_round_shift(s6 - s14);
1117da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x15 = dct_const_round_shift(s7 - s15);
1118da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
1119da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  // stage 2
1120da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s0 = x0;
1121da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s1 = x1;
1122da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s2 = x2;
1123da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s3 = x3;
1124da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s4 = x4;
1125da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s5 = x5;
1126da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s6 = x6;
1127da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s7 = x7;
1128da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s8 =    x8 * cospi_4_64   + x9 * cospi_28_64;
1129da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s9 =    x8 * cospi_28_64  - x9 * cospi_4_64;
1130da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s10 =   x10 * cospi_20_64 + x11 * cospi_12_64;
1131da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s11 =   x10 * cospi_12_64 - x11 * cospi_20_64;
1132da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s12 = - x12 * cospi_28_64 + x13 * cospi_4_64;
1133da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s13 =   x12 * cospi_4_64  + x13 * cospi_28_64;
1134da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s14 = - x14 * cospi_12_64 + x15 * cospi_20_64;
1135da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s15 =   x14 * cospi_20_64 + x15 * cospi_12_64;
1136da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
1137da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x0 = s0 + s4;
1138da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x1 = s1 + s5;
1139da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x2 = s2 + s6;
1140da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x3 = s3 + s7;
1141da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x4 = s0 - s4;
1142da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x5 = s1 - s5;
1143da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x6 = s2 - s6;
1144da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x7 = s3 - s7;
1145da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x8 = dct_const_round_shift(s8 + s12);
1146da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x9 = dct_const_round_shift(s9 + s13);
1147da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x10 = dct_const_round_shift(s10 + s14);
1148da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x11 = dct_const_round_shift(s11 + s15);
1149da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x12 = dct_const_round_shift(s8 - s12);
1150da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x13 = dct_const_round_shift(s9 - s13);
1151da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x14 = dct_const_round_shift(s10 - s14);
1152da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x15 = dct_const_round_shift(s11 - s15);
1153da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
1154da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  // stage 3
1155da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s0 = x0;
1156da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s1 = x1;
1157da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s2 = x2;
1158da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s3 = x3;
1159da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s4 = x4 * cospi_8_64  + x5 * cospi_24_64;
1160da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s5 = x4 * cospi_24_64 - x5 * cospi_8_64;
1161da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s6 = - x6 * cospi_24_64 + x7 * cospi_8_64;
1162da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s7 =   x6 * cospi_8_64  + x7 * cospi_24_64;
1163da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s8 = x8;
1164da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s9 = x9;
1165da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s10 = x10;
1166da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s11 = x11;
1167da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s12 = x12 * cospi_8_64  + x13 * cospi_24_64;
1168da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s13 = x12 * cospi_24_64 - x13 * cospi_8_64;
1169da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s14 = - x14 * cospi_24_64 + x15 * cospi_8_64;
1170da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s15 =   x14 * cospi_8_64  + x15 * cospi_24_64;
1171da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
1172da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x0 = s0 + s2;
1173da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x1 = s1 + s3;
1174da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x2 = s0 - s2;
1175da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x3 = s1 - s3;
1176da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x4 = dct_const_round_shift(s4 + s6);
1177da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x5 = dct_const_round_shift(s5 + s7);
1178da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x6 = dct_const_round_shift(s4 - s6);
1179da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x7 = dct_const_round_shift(s5 - s7);
1180da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x8 = s8 + s10;
1181da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x9 = s9 + s11;
1182da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x10 = s8 - s10;
1183da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x11 = s9 - s11;
1184da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x12 = dct_const_round_shift(s12 + s14);
1185da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x13 = dct_const_round_shift(s13 + s15);
1186da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x14 = dct_const_round_shift(s12 - s14);
1187da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x15 = dct_const_round_shift(s13 - s15);
1188da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
1189da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  // stage 4
1190da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s2 = (- cospi_16_64) * (x2 + x3);
1191da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s3 = cospi_16_64 * (x2 - x3);
1192da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s6 = cospi_16_64 * (x6 + x7);
1193da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s7 = cospi_16_64 * (- x6 + x7);
1194da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s10 = cospi_16_64 * (x10 + x11);
1195da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s11 = cospi_16_64 * (- x10 + x11);
1196da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s14 = (- cospi_16_64) * (x14 + x15);
1197da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  s15 = cospi_16_64 * (x14 - x15);
1198da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
1199da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x2 = dct_const_round_shift(s2);
1200da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x3 = dct_const_round_shift(s3);
1201da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x6 = dct_const_round_shift(s6);
1202da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x7 = dct_const_round_shift(s7);
1203da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x10 = dct_const_round_shift(s10);
1204da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x11 = dct_const_round_shift(s11);
1205da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x14 = dct_const_round_shift(s14);
1206da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  x15 = dct_const_round_shift(s15);
1207da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
1208da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  output[0] =  x0;
1209da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  output[1] = -x8;
1210da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  output[2] =  x12;
1211da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  output[3] = -x4;
1212da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  output[4] =  x6;
1213da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  output[5] =  x14;
1214da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  output[6] =  x10;
1215da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  output[7] =  x2;
1216da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  output[8] =  x3;
1217da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  output[9] =  x11;
1218da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  output[10] =  x15;
1219da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  output[11] =  x7;
1220da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  output[12] =  x5;
1221da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  output[13] = -x13;
1222da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  output[14] =  x9;
1223da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  output[15] = -x1;
1224da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian}
1225da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
1226da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
1227da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#endif  // HAVE_DSPR2
1228