1/* 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include "./vp9_rtcd.h" 12#include "vpx_dsp/mips/macros_msa.h" 13 14#define BLOCK_ERROR_BLOCKSIZE_MSA(BSize) \ 15 static int64_t block_error_##BSize##size_msa( \ 16 const int16_t *coeff_ptr, const int16_t *dq_coeff_ptr, int64_t *ssz) { \ 17 int64_t err = 0; \ 18 uint32_t loop_cnt; \ 19 v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h; \ 20 v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w; \ 21 v2i64 sq_coeff_r, sq_coeff_l; \ 22 v2i64 err0, err_dup0, err1, err_dup1; \ 23 \ 24 coeff = LD_SH(coeff_ptr); \ 25 dq_coeff = LD_SH(dq_coeff_ptr); \ 26 UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \ 27 ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \ 28 HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \ 29 DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w, sq_coeff_r, \ 30 sq_coeff_l); \ 31 DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1); \ 32 \ 33 coeff = LD_SH(coeff_ptr + 8); \ 34 dq_coeff = LD_SH(dq_coeff_ptr + 8); \ 35 UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \ 36 ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \ 37 HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \ 38 DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \ 39 DPADD_SD2_SD(diff_r, diff_l, err0, err1); \ 40 \ 41 coeff_ptr += 16; \ 42 dq_coeff_ptr += 16; \ 43 \ 44 for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) { \ 45 coeff = LD_SH(coeff_ptr); \ 46 dq_coeff = LD_SH(dq_coeff_ptr); \ 47 UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \ 48 ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \ 49 HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \ 50 DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \ 51 DPADD_SD2_SD(diff_r, diff_l, err0, err1); \ 52 \ 53 coeff = LD_SH(coeff_ptr + 8); \ 54 dq_coeff = LD_SH(dq_coeff_ptr + 8); \ 55 UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \ 56 ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \ 57 HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \ 58 DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \ 59 DPADD_SD2_SD(diff_r, diff_l, err0, err1); \ 60 \ 61 coeff_ptr += 16; \ 62 dq_coeff_ptr += 16; \ 63 } \ 64 \ 65 err_dup0 = __msa_splati_d(sq_coeff_r, 1); \ 66 err_dup1 = __msa_splati_d(sq_coeff_l, 1); \ 67 sq_coeff_r += err_dup0; \ 68 sq_coeff_l += err_dup1; \ 69 *ssz = __msa_copy_s_d(sq_coeff_r, 0); \ 70 *ssz += __msa_copy_s_d(sq_coeff_l, 0); \ 71 \ 72 err_dup0 = __msa_splati_d(err0, 1); \ 73 err_dup1 = __msa_splati_d(err1, 1); \ 74 err0 += err_dup0; \ 75 err1 += err_dup1; \ 76 err = __msa_copy_s_d(err0, 0); \ 77 err += __msa_copy_s_d(err1, 0); \ 78 \ 79 return err; \ 80 } 81 82BLOCK_ERROR_BLOCKSIZE_MSA(16); 83BLOCK_ERROR_BLOCKSIZE_MSA(64); 84BLOCK_ERROR_BLOCKSIZE_MSA(256); 85BLOCK_ERROR_BLOCKSIZE_MSA(1024); 86 87int64_t vp9_block_error_msa(const tran_low_t *coeff_ptr, 88 const tran_low_t *dq_coeff_ptr, intptr_t blk_size, 89 int64_t *ssz) { 90 int64_t err; 91 const int16_t *coeff = (const int16_t *)coeff_ptr; 92 const int16_t *dq_coeff = (const int16_t *)dq_coeff_ptr; 93 94 switch (blk_size) { 95 case 16: err = block_error_16size_msa(coeff, dq_coeff, ssz); break; 96 case 64: err = block_error_64size_msa(coeff, dq_coeff, ssz); break; 97 case 256: err = block_error_256size_msa(coeff, dq_coeff, ssz); break; 98 case 1024: err = block_error_1024size_msa(coeff, dq_coeff, ssz); break; 99 default: 100 err = vp9_block_error_c(coeff_ptr, dq_coeff_ptr, blk_size, ssz); 101 break; 102 } 103 104 return err; 105} 106