1da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian/* 2da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * Copyright (c) 2015 The WebM project authors. All Rights Reserved. 3da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * 4da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * Use of this source code is governed by a BSD-style license 5da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * that can be found in the LICENSE file in the root of the source 6da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * tree. An additional intellectual property rights grant can be found 7da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * in the file PATENTS. All contributing project authors may 8da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * be found in the AUTHORS file in the root of the source tree. 9da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian */ 10da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 11da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "./vp8_rtcd.h" 12da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "vp8/common/mips/msa/vp8_macros_msa.h" 13da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "vp8/encoder/block.h" 14da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 157bc9febe8749e98a3812a0dc4380ceae75c29450Johannint32_t vp8_block_error_msa(int16_t *coeff_ptr, int16_t *dq_coeff_ptr) { 167bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t err = 0; 177bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t loop_cnt; 187bc9febe8749e98a3812a0dc4380ceae75c29450Johann v8i16 coeff, dq_coeff, coeff0, coeff1; 197bc9febe8749e98a3812a0dc4380ceae75c29450Johann v4i32 diff0, diff1; 207bc9febe8749e98a3812a0dc4380ceae75c29450Johann v2i64 err0 = { 0 }; 217bc9febe8749e98a3812a0dc4380ceae75c29450Johann v2i64 err1 = { 0 }; 227bc9febe8749e98a3812a0dc4380ceae75c29450Johann 237bc9febe8749e98a3812a0dc4380ceae75c29450Johann for (loop_cnt = 2; loop_cnt--;) { 247bc9febe8749e98a3812a0dc4380ceae75c29450Johann coeff = LD_SH(coeff_ptr); 257bc9febe8749e98a3812a0dc4380ceae75c29450Johann dq_coeff = LD_SH(dq_coeff_ptr); 267bc9febe8749e98a3812a0dc4380ceae75c29450Johann ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1); 277bc9febe8749e98a3812a0dc4380ceae75c29450Johann HSUB_UH2_SW(coeff0, coeff1, diff0, diff1); 287bc9febe8749e98a3812a0dc4380ceae75c29450Johann DPADD_SD2_SD(diff0, diff1, err0, err1); 297bc9febe8749e98a3812a0dc4380ceae75c29450Johann coeff_ptr += 8; 307bc9febe8749e98a3812a0dc4380ceae75c29450Johann dq_coeff_ptr += 8; 317bc9febe8749e98a3812a0dc4380ceae75c29450Johann } 327bc9febe8749e98a3812a0dc4380ceae75c29450Johann 337bc9febe8749e98a3812a0dc4380ceae75c29450Johann err0 += __msa_splati_d(err0, 1); 347bc9febe8749e98a3812a0dc4380ceae75c29450Johann err1 += __msa_splati_d(err1, 1); 357bc9febe8749e98a3812a0dc4380ceae75c29450Johann err = __msa_copy_s_d(err0, 0); 367bc9febe8749e98a3812a0dc4380ceae75c29450Johann err += __msa_copy_s_d(err1, 0); 377bc9febe8749e98a3812a0dc4380ceae75c29450Johann 387bc9febe8749e98a3812a0dc4380ceae75c29450Johann return err; 397bc9febe8749e98a3812a0dc4380ceae75c29450Johann} 40da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 417bc9febe8749e98a3812a0dc4380ceae75c29450Johannint32_t vp8_mbblock_error_msa(MACROBLOCK *mb, int32_t dc) { 427bc9febe8749e98a3812a0dc4380ceae75c29450Johann BLOCK *be; 437bc9febe8749e98a3812a0dc4380ceae75c29450Johann BLOCKD *bd; 447bc9febe8749e98a3812a0dc4380ceae75c29450Johann int16_t *coeff_ptr, *dq_coeff_ptr; 457bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t err = 0; 467bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t loop_cnt; 477bc9febe8749e98a3812a0dc4380ceae75c29450Johann v8i16 coeff, coeff0, coeff1, coeff2, coeff3, coeff4; 487bc9febe8749e98a3812a0dc4380ceae75c29450Johann v8i16 dq_coeff, dq_coeff2, dq_coeff3, dq_coeff4; 497bc9febe8749e98a3812a0dc4380ceae75c29450Johann v4i32 diff0, diff1; 507bc9febe8749e98a3812a0dc4380ceae75c29450Johann v2i64 err0, err1; 517bc9febe8749e98a3812a0dc4380ceae75c29450Johann v16u8 zero = { 0 }; 527bc9febe8749e98a3812a0dc4380ceae75c29450Johann v16u8 mask0 = (v16u8)__msa_ldi_b(255); 537bc9febe8749e98a3812a0dc4380ceae75c29450Johann 547bc9febe8749e98a3812a0dc4380ceae75c29450Johann if (1 == dc) { 557bc9febe8749e98a3812a0dc4380ceae75c29450Johann mask0 = (v16u8)__msa_insve_w((v4i32)mask0, 0, (v4i32)zero); 567bc9febe8749e98a3812a0dc4380ceae75c29450Johann } 577bc9febe8749e98a3812a0dc4380ceae75c29450Johann 587bc9febe8749e98a3812a0dc4380ceae75c29450Johann for (loop_cnt = 0; loop_cnt < 8; ++loop_cnt) { 597bc9febe8749e98a3812a0dc4380ceae75c29450Johann be = &mb->block[2 * loop_cnt]; 607bc9febe8749e98a3812a0dc4380ceae75c29450Johann bd = &mb->e_mbd.block[2 * loop_cnt]; 617bc9febe8749e98a3812a0dc4380ceae75c29450Johann coeff_ptr = be->coeff; 627bc9febe8749e98a3812a0dc4380ceae75c29450Johann dq_coeff_ptr = bd->dqcoeff; 637bc9febe8749e98a3812a0dc4380ceae75c29450Johann coeff = LD_SH(coeff_ptr); 647bc9febe8749e98a3812a0dc4380ceae75c29450Johann dq_coeff = LD_SH(dq_coeff_ptr); 657bc9febe8749e98a3812a0dc4380ceae75c29450Johann coeff_ptr += 8; 667bc9febe8749e98a3812a0dc4380ceae75c29450Johann dq_coeff_ptr += 8; 677bc9febe8749e98a3812a0dc4380ceae75c29450Johann coeff2 = LD_SH(coeff_ptr); 687bc9febe8749e98a3812a0dc4380ceae75c29450Johann dq_coeff2 = LD_SH(dq_coeff_ptr); 697bc9febe8749e98a3812a0dc4380ceae75c29450Johann be = &mb->block[2 * loop_cnt + 1]; 707bc9febe8749e98a3812a0dc4380ceae75c29450Johann bd = &mb->e_mbd.block[2 * loop_cnt + 1]; 717bc9febe8749e98a3812a0dc4380ceae75c29450Johann coeff_ptr = be->coeff; 727bc9febe8749e98a3812a0dc4380ceae75c29450Johann dq_coeff_ptr = bd->dqcoeff; 737bc9febe8749e98a3812a0dc4380ceae75c29450Johann coeff3 = LD_SH(coeff_ptr); 747bc9febe8749e98a3812a0dc4380ceae75c29450Johann dq_coeff3 = LD_SH(dq_coeff_ptr); 757bc9febe8749e98a3812a0dc4380ceae75c29450Johann coeff_ptr += 8; 767bc9febe8749e98a3812a0dc4380ceae75c29450Johann dq_coeff_ptr += 8; 777bc9febe8749e98a3812a0dc4380ceae75c29450Johann coeff4 = LD_SH(coeff_ptr); 787bc9febe8749e98a3812a0dc4380ceae75c29450Johann dq_coeff4 = LD_SH(dq_coeff_ptr); 797bc9febe8749e98a3812a0dc4380ceae75c29450Johann ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1); 807bc9febe8749e98a3812a0dc4380ceae75c29450Johann HSUB_UH2_SW(coeff0, coeff1, diff0, diff1); 817bc9febe8749e98a3812a0dc4380ceae75c29450Johann diff0 = (v4i32)__msa_bmnz_v(zero, (v16u8)diff0, mask0); 827bc9febe8749e98a3812a0dc4380ceae75c29450Johann DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1); 837bc9febe8749e98a3812a0dc4380ceae75c29450Johann ILVRL_H2_SH(coeff2, dq_coeff2, coeff0, coeff1); 847bc9febe8749e98a3812a0dc4380ceae75c29450Johann HSUB_UH2_SW(coeff0, coeff1, diff0, diff1); 857bc9febe8749e98a3812a0dc4380ceae75c29450Johann DPADD_SD2_SD(diff0, diff1, err0, err1); 86da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian err0 += __msa_splati_d(err0, 1); 87da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian err1 += __msa_splati_d(err1, 1); 887bc9febe8749e98a3812a0dc4380ceae75c29450Johann err += __msa_copy_s_d(err0, 0); 89da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian err += __msa_copy_s_d(err1, 0); 90da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 917bc9febe8749e98a3812a0dc4380ceae75c29450Johann ILVRL_H2_SH(coeff3, dq_coeff3, coeff0, coeff1); 927bc9febe8749e98a3812a0dc4380ceae75c29450Johann HSUB_UH2_SW(coeff0, coeff1, diff0, diff1); 937bc9febe8749e98a3812a0dc4380ceae75c29450Johann diff0 = (v4i32)__msa_bmnz_v(zero, (v16u8)diff0, mask0); 947bc9febe8749e98a3812a0dc4380ceae75c29450Johann DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1); 957bc9febe8749e98a3812a0dc4380ceae75c29450Johann ILVRL_H2_SH(coeff4, dq_coeff4, coeff0, coeff1); 967bc9febe8749e98a3812a0dc4380ceae75c29450Johann HSUB_UH2_SW(coeff0, coeff1, diff0, diff1); 977bc9febe8749e98a3812a0dc4380ceae75c29450Johann DPADD_SD2_SD(diff0, diff1, err0, err1); 987bc9febe8749e98a3812a0dc4380ceae75c29450Johann err0 += __msa_splati_d(err0, 1); 997bc9febe8749e98a3812a0dc4380ceae75c29450Johann err1 += __msa_splati_d(err1, 1); 1007bc9febe8749e98a3812a0dc4380ceae75c29450Johann err += __msa_copy_s_d(err0, 0); 1017bc9febe8749e98a3812a0dc4380ceae75c29450Johann err += __msa_copy_s_d(err1, 0); 1027bc9febe8749e98a3812a0dc4380ceae75c29450Johann } 103da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 1047bc9febe8749e98a3812a0dc4380ceae75c29450Johann return err; 105da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 106da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 1077bc9febe8749e98a3812a0dc4380ceae75c29450Johannint32_t vp8_mbuverror_msa(MACROBLOCK *mb) { 1087bc9febe8749e98a3812a0dc4380ceae75c29450Johann BLOCK *be; 1097bc9febe8749e98a3812a0dc4380ceae75c29450Johann BLOCKD *bd; 1107bc9febe8749e98a3812a0dc4380ceae75c29450Johann int16_t *coeff_ptr, *dq_coeff_ptr; 1117bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t err = 0; 1127bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t loop_cnt; 1137bc9febe8749e98a3812a0dc4380ceae75c29450Johann v8i16 coeff, coeff0, coeff1, coeff2, coeff3, coeff4; 1147bc9febe8749e98a3812a0dc4380ceae75c29450Johann v8i16 dq_coeff, dq_coeff2, dq_coeff3, dq_coeff4; 1157bc9febe8749e98a3812a0dc4380ceae75c29450Johann v4i32 diff0, diff1; 1167bc9febe8749e98a3812a0dc4380ceae75c29450Johann v2i64 err0, err1, err_dup0, err_dup1; 1177bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1187bc9febe8749e98a3812a0dc4380ceae75c29450Johann for (loop_cnt = 16; loop_cnt < 24; loop_cnt += 2) { 1197bc9febe8749e98a3812a0dc4380ceae75c29450Johann be = &mb->block[loop_cnt]; 1207bc9febe8749e98a3812a0dc4380ceae75c29450Johann bd = &mb->e_mbd.block[loop_cnt]; 1217bc9febe8749e98a3812a0dc4380ceae75c29450Johann coeff_ptr = be->coeff; 1227bc9febe8749e98a3812a0dc4380ceae75c29450Johann dq_coeff_ptr = bd->dqcoeff; 1237bc9febe8749e98a3812a0dc4380ceae75c29450Johann coeff = LD_SH(coeff_ptr); 1247bc9febe8749e98a3812a0dc4380ceae75c29450Johann dq_coeff = LD_SH(dq_coeff_ptr); 1257bc9febe8749e98a3812a0dc4380ceae75c29450Johann coeff_ptr += 8; 1267bc9febe8749e98a3812a0dc4380ceae75c29450Johann dq_coeff_ptr += 8; 1277bc9febe8749e98a3812a0dc4380ceae75c29450Johann coeff2 = LD_SH(coeff_ptr); 1287bc9febe8749e98a3812a0dc4380ceae75c29450Johann dq_coeff2 = LD_SH(dq_coeff_ptr); 1297bc9febe8749e98a3812a0dc4380ceae75c29450Johann be = &mb->block[loop_cnt + 1]; 1307bc9febe8749e98a3812a0dc4380ceae75c29450Johann bd = &mb->e_mbd.block[loop_cnt + 1]; 1317bc9febe8749e98a3812a0dc4380ceae75c29450Johann coeff_ptr = be->coeff; 1327bc9febe8749e98a3812a0dc4380ceae75c29450Johann dq_coeff_ptr = bd->dqcoeff; 1337bc9febe8749e98a3812a0dc4380ceae75c29450Johann coeff3 = LD_SH(coeff_ptr); 1347bc9febe8749e98a3812a0dc4380ceae75c29450Johann dq_coeff3 = LD_SH(dq_coeff_ptr); 1357bc9febe8749e98a3812a0dc4380ceae75c29450Johann coeff_ptr += 8; 1367bc9febe8749e98a3812a0dc4380ceae75c29450Johann dq_coeff_ptr += 8; 1377bc9febe8749e98a3812a0dc4380ceae75c29450Johann coeff4 = LD_SH(coeff_ptr); 1387bc9febe8749e98a3812a0dc4380ceae75c29450Johann dq_coeff4 = LD_SH(dq_coeff_ptr); 1397bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1407bc9febe8749e98a3812a0dc4380ceae75c29450Johann ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1); 1417bc9febe8749e98a3812a0dc4380ceae75c29450Johann HSUB_UH2_SW(coeff0, coeff1, diff0, diff1); 1427bc9febe8749e98a3812a0dc4380ceae75c29450Johann DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1); 1437bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1447bc9febe8749e98a3812a0dc4380ceae75c29450Johann ILVRL_H2_SH(coeff2, dq_coeff2, coeff0, coeff1); 1457bc9febe8749e98a3812a0dc4380ceae75c29450Johann HSUB_UH2_SW(coeff0, coeff1, diff0, diff1); 1467bc9febe8749e98a3812a0dc4380ceae75c29450Johann DPADD_SD2_SD(diff0, diff1, err0, err1); 1477bc9febe8749e98a3812a0dc4380ceae75c29450Johann err_dup0 = __msa_splati_d(err0, 1); 1487bc9febe8749e98a3812a0dc4380ceae75c29450Johann err_dup1 = __msa_splati_d(err1, 1); 1497bc9febe8749e98a3812a0dc4380ceae75c29450Johann ADD2(err0, err_dup0, err1, err_dup1, err0, err1); 1507bc9febe8749e98a3812a0dc4380ceae75c29450Johann err += __msa_copy_s_d(err0, 0); 1517bc9febe8749e98a3812a0dc4380ceae75c29450Johann err += __msa_copy_s_d(err1, 0); 1527bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1537bc9febe8749e98a3812a0dc4380ceae75c29450Johann ILVRL_H2_SH(coeff3, dq_coeff3, coeff0, coeff1); 1547bc9febe8749e98a3812a0dc4380ceae75c29450Johann HSUB_UH2_SW(coeff0, coeff1, diff0, diff1); 1557bc9febe8749e98a3812a0dc4380ceae75c29450Johann DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1); 1567bc9febe8749e98a3812a0dc4380ceae75c29450Johann ILVRL_H2_SH(coeff4, dq_coeff4, coeff0, coeff1); 1577bc9febe8749e98a3812a0dc4380ceae75c29450Johann HSUB_UH2_SW(coeff0, coeff1, diff0, diff1); 1587bc9febe8749e98a3812a0dc4380ceae75c29450Johann DPADD_SD2_SD(diff0, diff1, err0, err1); 1597bc9febe8749e98a3812a0dc4380ceae75c29450Johann err_dup0 = __msa_splati_d(err0, 1); 1607bc9febe8749e98a3812a0dc4380ceae75c29450Johann err_dup1 = __msa_splati_d(err1, 1); 1617bc9febe8749e98a3812a0dc4380ceae75c29450Johann ADD2(err0, err_dup0, err1, err_dup1, err0, err1); 1627bc9febe8749e98a3812a0dc4380ceae75c29450Johann err += __msa_copy_s_d(err0, 0); 1637bc9febe8749e98a3812a0dc4380ceae75c29450Johann err += __msa_copy_s_d(err1, 0); 1647bc9febe8749e98a3812a0dc4380ceae75c29450Johann } 1657bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1667bc9febe8749e98a3812a0dc4380ceae75c29450Johann return err; 167da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 168