1/* 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include "./vp8_rtcd.h" 12#include "vp8/common/mips/msa/vp8_macros_msa.h" 13#include "vp8/encoder/block.h" 14 15int32_t vp8_block_error_msa(int16_t *coeff_ptr, int16_t *dq_coeff_ptr) 16{ 17 int32_t err = 0; 18 uint32_t loop_cnt; 19 v8i16 coeff, dq_coeff, coeff0, coeff1; 20 v4i32 diff0, diff1; 21 v2i64 err0 = { 0 }; 22 v2i64 err1 = { 0 }; 23 24 for (loop_cnt = 2; loop_cnt--;) 25 { 26 coeff = LD_SH(coeff_ptr); 27 dq_coeff = LD_SH(dq_coeff_ptr); 28 ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1); 29 HSUB_UH2_SW(coeff0, coeff1, diff0, diff1); 30 DPADD_SD2_SD(diff0, diff1, err0, err1); 31 coeff_ptr += 8; 32 dq_coeff_ptr += 8; 33 } 34 35 err0 += __msa_splati_d(err0, 1); 36 err1 += __msa_splati_d(err1, 1); 37 err = __msa_copy_s_d(err0, 0); 38 err += __msa_copy_s_d(err1, 0); 39 40 return err; 41} 42 43int32_t vp8_mbblock_error_msa(MACROBLOCK *mb, int32_t dc) 44{ 45 BLOCK *be; 46 BLOCKD *bd; 47 int16_t *coeff_ptr, *dq_coeff_ptr; 48 int32_t err = 0; 49 uint32_t loop_cnt; 50 v8i16 coeff, coeff0, coeff1, coeff2, coeff3, coeff4; 51 v8i16 dq_coeff, dq_coeff2, dq_coeff3, dq_coeff4; 52 v4i32 diff0, diff1; 53 v2i64 err0, err1; 54 v16u8 zero = { 0 }; 55 v16u8 mask0 = (v16u8)__msa_ldi_b(255); 56 57 if (1 == dc) 58 { 59 mask0 = (v16u8)__msa_insve_w((v4i32)mask0, 0, (v4i32)zero); 60 } 61 62 for (loop_cnt = 0; loop_cnt < 8; loop_cnt++) 63 { 64 be = &mb->block[2 * loop_cnt]; 65 bd = &mb->e_mbd.block[2 * loop_cnt]; 66 coeff_ptr = be->coeff; 67 dq_coeff_ptr = bd->dqcoeff; 68 coeff = LD_SH(coeff_ptr); 69 dq_coeff = LD_SH(dq_coeff_ptr); 70 coeff_ptr += 8; 71 dq_coeff_ptr += 8; 72 coeff2 = LD_SH(coeff_ptr); 73 dq_coeff2 = LD_SH(dq_coeff_ptr); 74 be = &mb->block[2 * loop_cnt + 1]; 75 bd = &mb->e_mbd.block[2 * loop_cnt + 1]; 76 coeff_ptr = be->coeff; 77 dq_coeff_ptr = bd->dqcoeff; 78 coeff3 = LD_SH(coeff_ptr); 79 dq_coeff3 = LD_SH(dq_coeff_ptr); 80 coeff_ptr += 8; 81 dq_coeff_ptr += 8; 82 coeff4 = LD_SH(coeff_ptr); 83 dq_coeff4 = LD_SH(dq_coeff_ptr); 84 ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1); 85 HSUB_UH2_SW(coeff0, coeff1, diff0, diff1); 86 diff0 = (v4i32)__msa_bmnz_v(zero, (v16u8)diff0, mask0); 87 DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1); 88 ILVRL_H2_SH(coeff2, dq_coeff2, coeff0, coeff1); 89 HSUB_UH2_SW(coeff0, coeff1, diff0, diff1); 90 DPADD_SD2_SD(diff0, diff1, err0, err1); 91 err0 += __msa_splati_d(err0, 1); 92 err1 += __msa_splati_d(err1, 1); 93 err += __msa_copy_s_d(err0, 0); 94 err += __msa_copy_s_d(err1, 0); 95 96 ILVRL_H2_SH(coeff3, dq_coeff3, coeff0, coeff1); 97 HSUB_UH2_SW(coeff0, coeff1, diff0, diff1); 98 diff0 = (v4i32)__msa_bmnz_v(zero, (v16u8)diff0, mask0); 99 DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1); 100 ILVRL_H2_SH(coeff4, dq_coeff4, coeff0, coeff1); 101 HSUB_UH2_SW(coeff0, coeff1, diff0, diff1); 102 DPADD_SD2_SD(diff0, diff1, err0, err1); 103 err0 += __msa_splati_d(err0, 1); 104 err1 += __msa_splati_d(err1, 1); 105 err += __msa_copy_s_d(err0, 0); 106 err += __msa_copy_s_d(err1, 0); 107 } 108 109 return err; 110} 111 112int32_t vp8_mbuverror_msa(MACROBLOCK *mb) 113{ 114 BLOCK *be; 115 BLOCKD *bd; 116 int16_t *coeff_ptr, *dq_coeff_ptr; 117 int32_t err = 0; 118 uint32_t loop_cnt; 119 v8i16 coeff, coeff0, coeff1, coeff2, coeff3, coeff4; 120 v8i16 dq_coeff, dq_coeff2, dq_coeff3, dq_coeff4; 121 v4i32 diff0, diff1; 122 v2i64 err0, err1, err_dup0, err_dup1; 123 124 for (loop_cnt = 16; loop_cnt < 24; loop_cnt += 2) 125 { 126 be = &mb->block[loop_cnt]; 127 bd = &mb->e_mbd.block[loop_cnt]; 128 coeff_ptr = be->coeff; 129 dq_coeff_ptr = bd->dqcoeff; 130 coeff = LD_SH(coeff_ptr); 131 dq_coeff = LD_SH(dq_coeff_ptr); 132 coeff_ptr += 8; 133 dq_coeff_ptr += 8; 134 coeff2 = LD_SH(coeff_ptr); 135 dq_coeff2 = LD_SH(dq_coeff_ptr); 136 be = &mb->block[loop_cnt + 1]; 137 bd = &mb->e_mbd.block[loop_cnt + 1]; 138 coeff_ptr = be->coeff; 139 dq_coeff_ptr = bd->dqcoeff; 140 coeff3 = LD_SH(coeff_ptr); 141 dq_coeff3 = LD_SH(dq_coeff_ptr); 142 coeff_ptr += 8; 143 dq_coeff_ptr += 8; 144 coeff4 = LD_SH(coeff_ptr); 145 dq_coeff4 = LD_SH(dq_coeff_ptr); 146 147 ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1); 148 HSUB_UH2_SW(coeff0, coeff1, diff0, diff1); 149 DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1); 150 151 ILVRL_H2_SH(coeff2, dq_coeff2, coeff0, coeff1); 152 HSUB_UH2_SW(coeff0, coeff1, diff0, diff1); 153 DPADD_SD2_SD(diff0, diff1, err0, err1); 154 err_dup0 = __msa_splati_d(err0, 1); 155 err_dup1 = __msa_splati_d(err1, 1); 156 ADD2(err0, err_dup0, err1, err_dup1, err0, err1); 157 err += __msa_copy_s_d(err0, 0); 158 err += __msa_copy_s_d(err1, 0); 159 160 ILVRL_H2_SH(coeff3, dq_coeff3, coeff0, coeff1); 161 HSUB_UH2_SW(coeff0, coeff1, diff0, diff1); 162 DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1); 163 ILVRL_H2_SH(coeff4, dq_coeff4, coeff0, coeff1); 164 HSUB_UH2_SW(coeff0, coeff1, diff0, diff1); 165 DPADD_SD2_SD(diff0, diff1, err0, err1); 166 err_dup0 = __msa_splati_d(err0, 1); 167 err_dup1 = __msa_splati_d(err1, 1); 168 ADD2(err0, err_dup0, err1, err_dup1, err0, err1); 169 err += __msa_copy_s_d(err0, 0); 170 err += __msa_copy_s_d(err1, 0); 171 } 172 173 return err; 174} 175