1/* 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include "./vp8_rtcd.h" 12#include "vp8/common/mips/msa/vp8_macros_msa.h" 13#include "vp8/encoder/block.h" 14 15static int8_t fast_quantize_b_msa(int16_t *coeff_ptr, int16_t *zbin, 16 int16_t *round, int16_t *quant, 17 int16_t *de_quant, int16_t *q_coeff, 18 int16_t *dq_coeff) 19{ 20 int32_t cnt, eob; 21 v16i8 inv_zig_zag = { 0, 1, 5, 6, 2, 4, 7, 12, 22 3, 8, 11, 13, 9, 10, 14, 15 }; 23 v8i16 round0, round1; 24 v8i16 sign_z0, sign_z1; 25 v8i16 q_coeff0, q_coeff1; 26 v8i16 x0, x1, de_quant0, de_quant1; 27 v8i16 coeff0, coeff1, z0, z1; 28 v8i16 quant0, quant1, quant2, quant3; 29 v8i16 zero = { 0 }; 30 v8i16 inv_zig_zag0, inv_zig_zag1; 31 v8i16 zigzag_mask0 = { 0, 1, 4, 8, 5, 2, 3, 6 }; 32 v8i16 zigzag_mask1 = { 9, 12, 13, 10, 7, 11, 14, 15 }; 33 v8i16 temp0_h, temp1_h, temp2_h, temp3_h; 34 v4i32 temp0_w, temp1_w, temp2_w, temp3_w; 35 36 ILVRL_B2_SH(zero, inv_zig_zag, inv_zig_zag0, inv_zig_zag1); 37 eob = -1; 38 LD_SH2(coeff_ptr, 8, coeff0, coeff1); 39 VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1, 40 z0, z1); 41 LD_SH2(round, 8, coeff0, coeff1); 42 VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1, 43 round0, round1); 44 LD_SH2(quant, 8, coeff0, coeff1); 45 VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1, 46 quant0, quant2); 47 sign_z0 = z0 >> 15; 48 sign_z1 = z1 >> 15; 49 x0 = __msa_add_a_h(z0, zero); 50 x1 = __msa_add_a_h(z1, zero); 51 ILVL_H2_SH(quant0, quant0, quant2, quant2, quant1, quant3); 52 ILVR_H2_SH(quant0, quant0, quant2, quant2, quant0, quant2); 53 ILVL_H2_SH(round0, x0, round1, x1, temp1_h, temp3_h); 54 ILVR_H2_SH(round0, x0, round1, x1, temp0_h, temp2_h); 55 DOTP_SH4_SW(temp0_h, temp1_h, temp2_h, temp3_h, quant0, quant1, quant2, 56 quant3, temp0_w, temp1_w, temp2_w, temp3_w); 57 SRA_4V(temp0_w, temp1_w, temp2_w, temp3_w, 16); 58 PCKEV_H2_SH(temp1_w, temp0_w, temp3_w, temp2_w, x0, x1); 59 x0 = x0 ^ sign_z0; 60 x1 = x1 ^ sign_z1; 61 SUB2(x0, sign_z0, x1, sign_z1, x0, x1); 62 VSHF_H2_SH(x0, x1, x0, x1, inv_zig_zag0, inv_zig_zag1, q_coeff0, q_coeff1); 63 ST_SH2(q_coeff0, q_coeff1, q_coeff, 8); 64 LD_SH2(de_quant, 8, de_quant0, de_quant1); 65 q_coeff0 *= de_quant0; 66 q_coeff1 *= de_quant1; 67 ST_SH2(q_coeff0, q_coeff1, dq_coeff, 8); 68 69 for (cnt = 0; cnt < 16; ++cnt) 70 { 71 if ((cnt <= 7) && (x1[7 - cnt] != 0)) 72 { 73 eob = (15 - cnt); 74 break; 75 } 76 77 if ((cnt > 7) && (x0[7 - (cnt - 8)] != 0)) 78 { 79 eob = (7 - (cnt - 8)); 80 break; 81 } 82 } 83 84 return (int8_t)(eob + 1); 85} 86 87static int8_t exact_regular_quantize_b_msa(int16_t *zbin_boost, 88 int16_t *coeff_ptr, 89 int16_t *zbin, 90 int16_t *round, 91 int16_t *quant, 92 int16_t *quant_shift, 93 int16_t *de_quant, 94 int16_t zbin_oq_in, 95 int16_t *q_coeff, 96 int16_t *dq_coeff) 97{ 98 int32_t cnt, eob; 99 int16_t *boost_temp = zbin_boost; 100 v16i8 inv_zig_zag = { 0, 1, 5, 6, 2, 4, 7, 12, 101 3, 8, 11, 13, 9, 10, 14, 15 }; 102 v8i16 round0, round1; 103 v8i16 sign_z0, sign_z1; 104 v8i16 q_coeff0, q_coeff1; 105 v8i16 z_bin0, z_bin1, zbin_o_q; 106 v8i16 x0, x1, sign_x0, sign_x1, de_quant0, de_quant1; 107 v8i16 coeff0, coeff1, z0, z1; 108 v8i16 quant0, quant1, quant2, quant3; 109 v8i16 zero = { 0 }; 110 v8i16 inv_zig_zag0, inv_zig_zag1; 111 v8i16 zigzag_mask0 = { 0, 1, 4, 8, 5, 2, 3, 6 }; 112 v8i16 zigzag_mask1 = { 9, 12, 13, 10, 7, 11, 14, 15 }; 113 v8i16 temp0_h, temp1_h, temp2_h, temp3_h; 114 v4i32 temp0_w, temp1_w, temp2_w, temp3_w; 115 116 ILVRL_B2_SH(zero, inv_zig_zag, inv_zig_zag0, inv_zig_zag1); 117 zbin_o_q = __msa_fill_h(zbin_oq_in); 118 eob = -1; 119 LD_SH2(coeff_ptr, 8, coeff0, coeff1); 120 VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1, 121 z0, z1); 122 LD_SH2(round, 8, coeff0, coeff1); 123 VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1, 124 round0, round1); 125 LD_SH2(quant, 8, coeff0, coeff1); 126 VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1, 127 quant0, quant2); 128 LD_SH2(zbin, 8, coeff0, coeff1); 129 VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1, 130 z_bin0, z_bin1); 131 sign_z0 = z0 >> 15; 132 sign_z1 = z1 >> 15; 133 x0 = __msa_add_a_h(z0, zero); 134 x1 = __msa_add_a_h(z1, zero); 135 SUB2(x0, z_bin0, x1, z_bin1, z_bin0, z_bin1); 136 SUB2(z_bin0, zbin_o_q, z_bin1, zbin_o_q, z_bin0, z_bin1); 137 ILVL_H2_SH(quant0, quant0, quant2, quant2, quant1, quant3); 138 ILVR_H2_SH(quant0, quant0, quant2, quant2, quant0, quant2); 139 ILVL_H2_SH(round0, x0, round1, x1, temp1_h, temp3_h); 140 ILVR_H2_SH(round0, x0, round1, x1, temp0_h, temp2_h); 141 DOTP_SH4_SW(temp0_h, temp1_h, temp2_h, temp3_h, quant0, quant1, quant2, 142 quant3, temp0_w, temp1_w, temp2_w, temp3_w); 143 SRA_4V(temp0_w, temp1_w, temp2_w, temp3_w, 16); 144 PCKEV_H2_SH(temp1_w, temp0_w, temp3_w, temp2_w, temp0_h, temp2_h); 145 LD_SH2(quant_shift, 8, coeff0, coeff1); 146 VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1, 147 quant0, quant2); 148 ILVL_H2_SH(quant0, quant0, quant2, quant2, quant1, quant3); 149 ILVR_H2_SH(quant0, quant0, quant2, quant2, quant0, quant2); 150 ADD2(x0, round0, x1, round1, x0, x1); 151 ILVL_H2_SH(temp0_h, x0, temp2_h, x1, temp1_h, temp3_h); 152 ILVR_H2_SH(temp0_h, x0, temp2_h, x1, temp0_h, temp2_h); 153 DOTP_SH4_SW(temp0_h, temp1_h, temp2_h, temp3_h, quant0, quant1, quant2, 154 quant3, temp0_w, temp1_w, temp2_w, temp3_w); 155 SRA_4V(temp0_w, temp1_w, temp2_w, temp3_w, 16); 156 PCKEV_H2_SH(temp1_w, temp0_w, temp3_w, temp2_w, x0, x1); 157 sign_x0 = x0 ^ sign_z0; 158 sign_x1 = x1 ^ sign_z1; 159 SUB2(sign_x0, sign_z0, sign_x1, sign_z1, sign_x0, sign_x1); 160 for (cnt = 0; cnt < 16; ++cnt) 161 { 162 if (cnt <= 7) 163 { 164 if (boost_temp[0] <= z_bin0[cnt]) 165 { 166 if (x0[cnt]) 167 { 168 eob = cnt; 169 boost_temp = zbin_boost; 170 } 171 else 172 { 173 boost_temp++; 174 } 175 } 176 else 177 { 178 sign_x0[cnt] = 0; 179 boost_temp++; 180 } 181 } 182 else 183 { 184 if (boost_temp[0] <= z_bin1[cnt - 8]) 185 { 186 if (x1[cnt - 8]) 187 { 188 eob = cnt; 189 boost_temp = zbin_boost; 190 } 191 else 192 { 193 boost_temp++; 194 } 195 } 196 else 197 { 198 sign_x1[cnt - 8] = 0; 199 boost_temp++; 200 } 201 } 202 } 203 204 VSHF_H2_SH(sign_x0, sign_x1, sign_x0, sign_x1, inv_zig_zag0, inv_zig_zag1, 205 q_coeff0, q_coeff1); 206 ST_SH2(q_coeff0, q_coeff1, q_coeff, 8); 207 LD_SH2(de_quant, 8, de_quant0, de_quant1); 208 MUL2(de_quant0, q_coeff0, de_quant1, q_coeff1, de_quant0, de_quant1); 209 ST_SH2(de_quant0, de_quant1, dq_coeff, 8); 210 211 return (int8_t)(eob + 1); 212} 213 214void vp8_fast_quantize_b_msa(BLOCK *b, BLOCKD *d) 215{ 216 int16_t *coeff_ptr = b->coeff; 217 int16_t *zbin_ptr = b->zbin; 218 int16_t *round_ptr = b->round; 219 int16_t *quant_ptr = b->quant_fast; 220 int16_t *qcoeff_ptr = d->qcoeff; 221 int16_t *dqcoeff_ptr = d->dqcoeff; 222 int16_t *dequant_ptr = d->dequant; 223 224 *d->eob = fast_quantize_b_msa(coeff_ptr, zbin_ptr, round_ptr, quant_ptr, 225 dequant_ptr, qcoeff_ptr, dqcoeff_ptr); 226} 227 228void vp8_regular_quantize_b_msa(BLOCK *b, BLOCKD *d) 229{ 230 int16_t *zbin_boost_ptr = b->zrun_zbin_boost; 231 int16_t *coeff_ptr = b->coeff; 232 int16_t *zbin_ptr = b->zbin; 233 int16_t *round_ptr = b->round; 234 int16_t *quant_ptr = b->quant; 235 int16_t *quant_shift_ptr = b->quant_shift; 236 int16_t *qcoeff_ptr = d->qcoeff; 237 int16_t *dqcoeff_ptr = d->dqcoeff; 238 int16_t *dequant_ptr = d->dequant; 239 int16_t zbin_oq_value = b->zbin_extra; 240 241 *d->eob = exact_regular_quantize_b_msa(zbin_boost_ptr, coeff_ptr, 242 zbin_ptr, round_ptr, 243 quant_ptr, quant_shift_ptr, 244 dequant_ptr, zbin_oq_value, 245 qcoeff_ptr, dqcoeff_ptr); 246} 247