1/* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 12#include "vpx_ports/config.h" 13#include "dequantize.h" 14#include "idct.h" 15#include "vpx_mem/vpx_mem.h" 16 17DECLARE_ALIGNED(8, const unsigned char, cma[512]) = { 180, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 190, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 200, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 220, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 2312, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 2433, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 2554, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 2675, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 2796, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 28114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 29131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 30148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 31165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 32182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 33199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 34216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 35233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 36250, 251, 252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 37255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 38255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 39255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 40255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 41255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 42255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 43255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 44}; 45 46extern void vp8_short_idct4x4llm_mips(short *input, short *output, int pitch) ; 47extern void vp8_short_idct4x4llm_1_mips(short *input, short *output, int pitch); 48 49 50void vp8_dequant_idct_add_mips(short *input, short *dq, unsigned char *pred, 51 unsigned char *dest, int pitch, int stride) 52{ 53 short output[16]; 54 short *diff_ptr = output; 55 int i; 56 short a1, a2, a3, a0; 57 58 short input_temp[16]; 59 unsigned int in1, dq1, x1, in2, dq2; 60 const unsigned char *cm = &(cma[128]); 61 62 __asm__ __volatile__ ( 63 "lh %[in1], 0(%[input]) \n\t" 64 "lh %[dq1], 0(%[dq]) \n\t" 65 "lh %[in2], 2(%[input]) \n\t" 66 "lh %[dq2], 2(%[dq]) \n\t" 67 "append %[in1], %[in2], 16 \n\t" 68 "append %[dq1], %[dq2], 16 \n\t" 69 "mul.ph %[x1], %[dq1], %[in1] \n\t" 70 "sh %[x1], 2(%[input_temp]) \n\t" 71 "srl %[x1], %[x1], 16 \n\t" 72 "sh %[x1], 0(%[input_temp]) \n\t" 73 74 : [x1] "=&r" (x1), [in1] "=&r" (in1), [dq1] "=&r" (dq1), 75 [in2] "=&r" (in2), [dq2] "=&r" (dq2) 76 : [dq] "r" (dq), [input] "r" (input), 77 [input_temp] "r" (input_temp) 78 ); 79 80 for (i = 2; i < 16; i++) 81 { 82 input_temp[i] = dq[i] * input[i]; 83 } 84 85 /* the idct halves ( >> 1) the pitch */ 86 vp8_short_idct4x4llm_mips(input_temp, output, 4); 87 88 vpx_memset(input, 0, 32); 89 90 /* unroll the loop */ 91 for (i = 4; i--; ) 92 { 93 a0 = diff_ptr[0] + pred[0]; 94 a1 = diff_ptr[1] + pred[1]; 95 a2 = diff_ptr[2] + pred[2]; 96 a3 = diff_ptr[3] + pred[3]; 97 98 dest[0] = cm[a0]; 99 dest[1] = cm[a1]; 100 dest[2] = cm[a2]; 101 dest[3] = cm[a3]; 102 103 dest += stride; 104 diff_ptr += 4; 105 pred += pitch; 106 } 107} 108 109 110void vp8_dequant_dc_idct_add_mips(short *input, short *dq, unsigned char *pred, 111 unsigned char *dest, int pitch, int stride, 112 int Dc) 113{ 114 int i; 115 short output[16]; 116 short *diff_ptr = output; 117 short input_temp[16]; 118 short a1, a2, a3, a0; 119 120 unsigned int in1, dq1, x1, in2, dq2; 121 const unsigned char *cm = &(cma[128]); 122 123 input_temp[0] = (short)Dc; 124 125 __asm__ __volatile__ ( 126 "lh %[in1], 2(%[input]) \n\t" 127 "lh %[dq1], 2(%[dq]) \n\t" 128 "lh %[in2], 4(%[input]) \n\t" 129 "lh %[dq2], 4(%[dq]) \n\t" 130 "append %[in1], %[in2], 16 \n\t" 131 "append %[dq1], %[dq2], 16 \n\t" 132 "mul.ph %[x1], %[dq1], %[in1] \n\t" 133 "sh %[x1], 4(%[input_temp]) \n\t" 134 "srl %[x1], %[x1], 16 \n\t" 135 "sh %[x1], 2(%[input_temp]) \n\t" 136 137 : [x1] "=&r" (x1), [in1] "=&r" (in1), [dq1] "=&r" (dq1), 138 [in2] "=&r" (in2), [dq2] "=&r" (dq2) 139 : [dq] "r" (dq), [input] "r" (input), 140 [input_temp] "r" (input_temp) 141 ); 142 143 for (i = 3; i < 16; i++) 144 { 145 input_temp[i] = dq[i] * input[i]; 146 } 147 148 vp8_short_idct4x4llm_mips(input_temp, output, 4); 149 150 vpx_memset(input, 0, 32); 151 152 for (i = 4; i--; ) 153 { 154 a0 = diff_ptr[0] + pred[0]; 155 a1 = diff_ptr[1] + pred[1]; 156 a2 = diff_ptr[2] + pred[2]; 157 a3 = diff_ptr[3] + pred[3]; 158 159 dest[0] = cm[a0]; 160 dest[1] = cm[a1]; 161 dest[2] = cm[a2]; 162 dest[3] = cm[a3]; 163 164 dest += stride; 165 diff_ptr += 4; 166 pred += pitch; 167 } 168}