1/* 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include "./vp9_rtcd.h" 12#include "vpx_dsp/mips/macros_msa.h" 13 14#define BLOCK_ERROR_BLOCKSIZE_MSA(BSize) \ 15static int64_t block_error_##BSize##size_msa(const int16_t *coeff_ptr, \ 16 const int16_t *dq_coeff_ptr, \ 17 int64_t *ssz) { \ 18 int64_t err = 0; \ 19 uint32_t loop_cnt; \ 20 v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h; \ 21 v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w; \ 22 v2i64 sq_coeff_r, sq_coeff_l; \ 23 v2i64 err0, err_dup0, err1, err_dup1; \ 24 \ 25 coeff = LD_SH(coeff_ptr); \ 26 dq_coeff = LD_SH(dq_coeff_ptr); \ 27 UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \ 28 ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \ 29 HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \ 30 DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w, \ 31 sq_coeff_r, sq_coeff_l); \ 32 DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1); \ 33 \ 34 coeff = LD_SH(coeff_ptr + 8); \ 35 dq_coeff = LD_SH(dq_coeff_ptr + 8); \ 36 UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \ 37 ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \ 38 HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \ 39 DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \ 40 DPADD_SD2_SD(diff_r, diff_l, err0, err1); \ 41 \ 42 coeff_ptr += 16; \ 43 dq_coeff_ptr += 16; \ 44 \ 45 for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) { \ 46 coeff = LD_SH(coeff_ptr); \ 47 dq_coeff = LD_SH(dq_coeff_ptr); \ 48 UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \ 49 ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \ 50 HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \ 51 DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \ 52 DPADD_SD2_SD(diff_r, diff_l, err0, err1); \ 53 \ 54 coeff = LD_SH(coeff_ptr + 8); \ 55 dq_coeff = LD_SH(dq_coeff_ptr + 8); \ 56 UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \ 57 ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \ 58 HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \ 59 DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \ 60 DPADD_SD2_SD(diff_r, diff_l, err0, err1); \ 61 \ 62 coeff_ptr += 16; \ 63 dq_coeff_ptr += 16; \ 64 } \ 65 \ 66 err_dup0 = __msa_splati_d(sq_coeff_r, 1); \ 67 err_dup1 = __msa_splati_d(sq_coeff_l, 1); \ 68 sq_coeff_r += err_dup0; \ 69 sq_coeff_l += err_dup1; \ 70 *ssz = __msa_copy_s_d(sq_coeff_r, 0); \ 71 *ssz += __msa_copy_s_d(sq_coeff_l, 0); \ 72 \ 73 err_dup0 = __msa_splati_d(err0, 1); \ 74 err_dup1 = __msa_splati_d(err1, 1); \ 75 err0 += err_dup0; \ 76 err1 += err_dup1; \ 77 err = __msa_copy_s_d(err0, 0); \ 78 err += __msa_copy_s_d(err1, 0); \ 79 \ 80 return err; \ 81} 82 83BLOCK_ERROR_BLOCKSIZE_MSA(16); 84BLOCK_ERROR_BLOCKSIZE_MSA(64); 85BLOCK_ERROR_BLOCKSIZE_MSA(256); 86BLOCK_ERROR_BLOCKSIZE_MSA(1024); 87 88int64_t vp9_block_error_msa(const tran_low_t *coeff_ptr, 89 const tran_low_t *dq_coeff_ptr, 90 intptr_t blk_size, int64_t *ssz) { 91 int64_t err; 92 const int16_t *coeff = (const int16_t *)coeff_ptr; 93 const int16_t *dq_coeff = (const int16_t *)dq_coeff_ptr; 94 95 switch (blk_size) { 96 case 16: 97 err = block_error_16size_msa(coeff, dq_coeff, ssz); 98 break; 99 case 64: 100 err = block_error_64size_msa(coeff, dq_coeff, ssz); 101 break; 102 case 256: 103 err = block_error_256size_msa(coeff, dq_coeff, ssz); 104 break; 105 case 1024: 106 err = block_error_1024size_msa(coeff, dq_coeff, ssz); 107 break; 108 default: 109 err = vp9_block_error_c(coeff_ptr, dq_coeff_ptr, blk_size, ssz); 110 break; 111 } 112 113 return err; 114} 115