1da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian/*
2da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *
4da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  Use of this source code is governed by a BSD-style license
5da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  that can be found in the LICENSE file in the root of the source
6da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  tree. An additional intellectual property rights grant can be found
7da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  in the file PATENTS.  All contributing project authors may
8da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  be found in the AUTHORS file in the root of the source tree.
9da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian */
10da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
11da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "./vp8_rtcd.h"
12da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "vp8/common/mips/msa/vp8_macros_msa.h"
13da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "vp8/encoder/block.h"
14da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
157bc9febe8749e98a3812a0dc4380ceae75c29450Johannint32_t vp8_block_error_msa(int16_t *coeff_ptr, int16_t *dq_coeff_ptr) {
167bc9febe8749e98a3812a0dc4380ceae75c29450Johann  int32_t err = 0;
177bc9febe8749e98a3812a0dc4380ceae75c29450Johann  uint32_t loop_cnt;
187bc9febe8749e98a3812a0dc4380ceae75c29450Johann  v8i16 coeff, dq_coeff, coeff0, coeff1;
197bc9febe8749e98a3812a0dc4380ceae75c29450Johann  v4i32 diff0, diff1;
207bc9febe8749e98a3812a0dc4380ceae75c29450Johann  v2i64 err0 = { 0 };
217bc9febe8749e98a3812a0dc4380ceae75c29450Johann  v2i64 err1 = { 0 };
227bc9febe8749e98a3812a0dc4380ceae75c29450Johann
237bc9febe8749e98a3812a0dc4380ceae75c29450Johann  for (loop_cnt = 2; loop_cnt--;) {
247bc9febe8749e98a3812a0dc4380ceae75c29450Johann    coeff = LD_SH(coeff_ptr);
257bc9febe8749e98a3812a0dc4380ceae75c29450Johann    dq_coeff = LD_SH(dq_coeff_ptr);
267bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1);
277bc9febe8749e98a3812a0dc4380ceae75c29450Johann    HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
287bc9febe8749e98a3812a0dc4380ceae75c29450Johann    DPADD_SD2_SD(diff0, diff1, err0, err1);
297bc9febe8749e98a3812a0dc4380ceae75c29450Johann    coeff_ptr += 8;
307bc9febe8749e98a3812a0dc4380ceae75c29450Johann    dq_coeff_ptr += 8;
317bc9febe8749e98a3812a0dc4380ceae75c29450Johann  }
327bc9febe8749e98a3812a0dc4380ceae75c29450Johann
337bc9febe8749e98a3812a0dc4380ceae75c29450Johann  err0 += __msa_splati_d(err0, 1);
347bc9febe8749e98a3812a0dc4380ceae75c29450Johann  err1 += __msa_splati_d(err1, 1);
357bc9febe8749e98a3812a0dc4380ceae75c29450Johann  err = __msa_copy_s_d(err0, 0);
367bc9febe8749e98a3812a0dc4380ceae75c29450Johann  err += __msa_copy_s_d(err1, 0);
377bc9febe8749e98a3812a0dc4380ceae75c29450Johann
387bc9febe8749e98a3812a0dc4380ceae75c29450Johann  return err;
397bc9febe8749e98a3812a0dc4380ceae75c29450Johann}
40da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
417bc9febe8749e98a3812a0dc4380ceae75c29450Johannint32_t vp8_mbblock_error_msa(MACROBLOCK *mb, int32_t dc) {
427bc9febe8749e98a3812a0dc4380ceae75c29450Johann  BLOCK *be;
437bc9febe8749e98a3812a0dc4380ceae75c29450Johann  BLOCKD *bd;
447bc9febe8749e98a3812a0dc4380ceae75c29450Johann  int16_t *coeff_ptr, *dq_coeff_ptr;
457bc9febe8749e98a3812a0dc4380ceae75c29450Johann  int32_t err = 0;
467bc9febe8749e98a3812a0dc4380ceae75c29450Johann  uint32_t loop_cnt;
477bc9febe8749e98a3812a0dc4380ceae75c29450Johann  v8i16 coeff, coeff0, coeff1, coeff2, coeff3, coeff4;
487bc9febe8749e98a3812a0dc4380ceae75c29450Johann  v8i16 dq_coeff, dq_coeff2, dq_coeff3, dq_coeff4;
497bc9febe8749e98a3812a0dc4380ceae75c29450Johann  v4i32 diff0, diff1;
507bc9febe8749e98a3812a0dc4380ceae75c29450Johann  v2i64 err0, err1;
517bc9febe8749e98a3812a0dc4380ceae75c29450Johann  v16u8 zero = { 0 };
527bc9febe8749e98a3812a0dc4380ceae75c29450Johann  v16u8 mask0 = (v16u8)__msa_ldi_b(255);
537bc9febe8749e98a3812a0dc4380ceae75c29450Johann
547bc9febe8749e98a3812a0dc4380ceae75c29450Johann  if (1 == dc) {
557bc9febe8749e98a3812a0dc4380ceae75c29450Johann    mask0 = (v16u8)__msa_insve_w((v4i32)mask0, 0, (v4i32)zero);
567bc9febe8749e98a3812a0dc4380ceae75c29450Johann  }
577bc9febe8749e98a3812a0dc4380ceae75c29450Johann
587bc9febe8749e98a3812a0dc4380ceae75c29450Johann  for (loop_cnt = 0; loop_cnt < 8; ++loop_cnt) {
597bc9febe8749e98a3812a0dc4380ceae75c29450Johann    be = &mb->block[2 * loop_cnt];
607bc9febe8749e98a3812a0dc4380ceae75c29450Johann    bd = &mb->e_mbd.block[2 * loop_cnt];
617bc9febe8749e98a3812a0dc4380ceae75c29450Johann    coeff_ptr = be->coeff;
627bc9febe8749e98a3812a0dc4380ceae75c29450Johann    dq_coeff_ptr = bd->dqcoeff;
637bc9febe8749e98a3812a0dc4380ceae75c29450Johann    coeff = LD_SH(coeff_ptr);
647bc9febe8749e98a3812a0dc4380ceae75c29450Johann    dq_coeff = LD_SH(dq_coeff_ptr);
657bc9febe8749e98a3812a0dc4380ceae75c29450Johann    coeff_ptr += 8;
667bc9febe8749e98a3812a0dc4380ceae75c29450Johann    dq_coeff_ptr += 8;
677bc9febe8749e98a3812a0dc4380ceae75c29450Johann    coeff2 = LD_SH(coeff_ptr);
687bc9febe8749e98a3812a0dc4380ceae75c29450Johann    dq_coeff2 = LD_SH(dq_coeff_ptr);
697bc9febe8749e98a3812a0dc4380ceae75c29450Johann    be = &mb->block[2 * loop_cnt + 1];
707bc9febe8749e98a3812a0dc4380ceae75c29450Johann    bd = &mb->e_mbd.block[2 * loop_cnt + 1];
717bc9febe8749e98a3812a0dc4380ceae75c29450Johann    coeff_ptr = be->coeff;
727bc9febe8749e98a3812a0dc4380ceae75c29450Johann    dq_coeff_ptr = bd->dqcoeff;
737bc9febe8749e98a3812a0dc4380ceae75c29450Johann    coeff3 = LD_SH(coeff_ptr);
747bc9febe8749e98a3812a0dc4380ceae75c29450Johann    dq_coeff3 = LD_SH(dq_coeff_ptr);
757bc9febe8749e98a3812a0dc4380ceae75c29450Johann    coeff_ptr += 8;
767bc9febe8749e98a3812a0dc4380ceae75c29450Johann    dq_coeff_ptr += 8;
777bc9febe8749e98a3812a0dc4380ceae75c29450Johann    coeff4 = LD_SH(coeff_ptr);
787bc9febe8749e98a3812a0dc4380ceae75c29450Johann    dq_coeff4 = LD_SH(dq_coeff_ptr);
797bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1);
807bc9febe8749e98a3812a0dc4380ceae75c29450Johann    HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
817bc9febe8749e98a3812a0dc4380ceae75c29450Johann    diff0 = (v4i32)__msa_bmnz_v(zero, (v16u8)diff0, mask0);
827bc9febe8749e98a3812a0dc4380ceae75c29450Johann    DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1);
837bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ILVRL_H2_SH(coeff2, dq_coeff2, coeff0, coeff1);
847bc9febe8749e98a3812a0dc4380ceae75c29450Johann    HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
857bc9febe8749e98a3812a0dc4380ceae75c29450Johann    DPADD_SD2_SD(diff0, diff1, err0, err1);
86da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    err0 += __msa_splati_d(err0, 1);
87da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    err1 += __msa_splati_d(err1, 1);
887bc9febe8749e98a3812a0dc4380ceae75c29450Johann    err += __msa_copy_s_d(err0, 0);
89da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    err += __msa_copy_s_d(err1, 0);
90da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
917bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ILVRL_H2_SH(coeff3, dq_coeff3, coeff0, coeff1);
927bc9febe8749e98a3812a0dc4380ceae75c29450Johann    HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
937bc9febe8749e98a3812a0dc4380ceae75c29450Johann    diff0 = (v4i32)__msa_bmnz_v(zero, (v16u8)diff0, mask0);
947bc9febe8749e98a3812a0dc4380ceae75c29450Johann    DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1);
957bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ILVRL_H2_SH(coeff4, dq_coeff4, coeff0, coeff1);
967bc9febe8749e98a3812a0dc4380ceae75c29450Johann    HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
977bc9febe8749e98a3812a0dc4380ceae75c29450Johann    DPADD_SD2_SD(diff0, diff1, err0, err1);
987bc9febe8749e98a3812a0dc4380ceae75c29450Johann    err0 += __msa_splati_d(err0, 1);
997bc9febe8749e98a3812a0dc4380ceae75c29450Johann    err1 += __msa_splati_d(err1, 1);
1007bc9febe8749e98a3812a0dc4380ceae75c29450Johann    err += __msa_copy_s_d(err0, 0);
1017bc9febe8749e98a3812a0dc4380ceae75c29450Johann    err += __msa_copy_s_d(err1, 0);
1027bc9febe8749e98a3812a0dc4380ceae75c29450Johann  }
103da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
1047bc9febe8749e98a3812a0dc4380ceae75c29450Johann  return err;
105da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian}
106da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
1077bc9febe8749e98a3812a0dc4380ceae75c29450Johannint32_t vp8_mbuverror_msa(MACROBLOCK *mb) {
1087bc9febe8749e98a3812a0dc4380ceae75c29450Johann  BLOCK *be;
1097bc9febe8749e98a3812a0dc4380ceae75c29450Johann  BLOCKD *bd;
1107bc9febe8749e98a3812a0dc4380ceae75c29450Johann  int16_t *coeff_ptr, *dq_coeff_ptr;
1117bc9febe8749e98a3812a0dc4380ceae75c29450Johann  int32_t err = 0;
1127bc9febe8749e98a3812a0dc4380ceae75c29450Johann  uint32_t loop_cnt;
1137bc9febe8749e98a3812a0dc4380ceae75c29450Johann  v8i16 coeff, coeff0, coeff1, coeff2, coeff3, coeff4;
1147bc9febe8749e98a3812a0dc4380ceae75c29450Johann  v8i16 dq_coeff, dq_coeff2, dq_coeff3, dq_coeff4;
1157bc9febe8749e98a3812a0dc4380ceae75c29450Johann  v4i32 diff0, diff1;
1167bc9febe8749e98a3812a0dc4380ceae75c29450Johann  v2i64 err0, err1, err_dup0, err_dup1;
1177bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1187bc9febe8749e98a3812a0dc4380ceae75c29450Johann  for (loop_cnt = 16; loop_cnt < 24; loop_cnt += 2) {
1197bc9febe8749e98a3812a0dc4380ceae75c29450Johann    be = &mb->block[loop_cnt];
1207bc9febe8749e98a3812a0dc4380ceae75c29450Johann    bd = &mb->e_mbd.block[loop_cnt];
1217bc9febe8749e98a3812a0dc4380ceae75c29450Johann    coeff_ptr = be->coeff;
1227bc9febe8749e98a3812a0dc4380ceae75c29450Johann    dq_coeff_ptr = bd->dqcoeff;
1237bc9febe8749e98a3812a0dc4380ceae75c29450Johann    coeff = LD_SH(coeff_ptr);
1247bc9febe8749e98a3812a0dc4380ceae75c29450Johann    dq_coeff = LD_SH(dq_coeff_ptr);
1257bc9febe8749e98a3812a0dc4380ceae75c29450Johann    coeff_ptr += 8;
1267bc9febe8749e98a3812a0dc4380ceae75c29450Johann    dq_coeff_ptr += 8;
1277bc9febe8749e98a3812a0dc4380ceae75c29450Johann    coeff2 = LD_SH(coeff_ptr);
1287bc9febe8749e98a3812a0dc4380ceae75c29450Johann    dq_coeff2 = LD_SH(dq_coeff_ptr);
1297bc9febe8749e98a3812a0dc4380ceae75c29450Johann    be = &mb->block[loop_cnt + 1];
1307bc9febe8749e98a3812a0dc4380ceae75c29450Johann    bd = &mb->e_mbd.block[loop_cnt + 1];
1317bc9febe8749e98a3812a0dc4380ceae75c29450Johann    coeff_ptr = be->coeff;
1327bc9febe8749e98a3812a0dc4380ceae75c29450Johann    dq_coeff_ptr = bd->dqcoeff;
1337bc9febe8749e98a3812a0dc4380ceae75c29450Johann    coeff3 = LD_SH(coeff_ptr);
1347bc9febe8749e98a3812a0dc4380ceae75c29450Johann    dq_coeff3 = LD_SH(dq_coeff_ptr);
1357bc9febe8749e98a3812a0dc4380ceae75c29450Johann    coeff_ptr += 8;
1367bc9febe8749e98a3812a0dc4380ceae75c29450Johann    dq_coeff_ptr += 8;
1377bc9febe8749e98a3812a0dc4380ceae75c29450Johann    coeff4 = LD_SH(coeff_ptr);
1387bc9febe8749e98a3812a0dc4380ceae75c29450Johann    dq_coeff4 = LD_SH(dq_coeff_ptr);
1397bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1407bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1);
1417bc9febe8749e98a3812a0dc4380ceae75c29450Johann    HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
1427bc9febe8749e98a3812a0dc4380ceae75c29450Johann    DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1);
1437bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1447bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ILVRL_H2_SH(coeff2, dq_coeff2, coeff0, coeff1);
1457bc9febe8749e98a3812a0dc4380ceae75c29450Johann    HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
1467bc9febe8749e98a3812a0dc4380ceae75c29450Johann    DPADD_SD2_SD(diff0, diff1, err0, err1);
1477bc9febe8749e98a3812a0dc4380ceae75c29450Johann    err_dup0 = __msa_splati_d(err0, 1);
1487bc9febe8749e98a3812a0dc4380ceae75c29450Johann    err_dup1 = __msa_splati_d(err1, 1);
1497bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ADD2(err0, err_dup0, err1, err_dup1, err0, err1);
1507bc9febe8749e98a3812a0dc4380ceae75c29450Johann    err += __msa_copy_s_d(err0, 0);
1517bc9febe8749e98a3812a0dc4380ceae75c29450Johann    err += __msa_copy_s_d(err1, 0);
1527bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1537bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ILVRL_H2_SH(coeff3, dq_coeff3, coeff0, coeff1);
1547bc9febe8749e98a3812a0dc4380ceae75c29450Johann    HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
1557bc9febe8749e98a3812a0dc4380ceae75c29450Johann    DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1);
1567bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ILVRL_H2_SH(coeff4, dq_coeff4, coeff0, coeff1);
1577bc9febe8749e98a3812a0dc4380ceae75c29450Johann    HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
1587bc9febe8749e98a3812a0dc4380ceae75c29450Johann    DPADD_SD2_SD(diff0, diff1, err0, err1);
1597bc9febe8749e98a3812a0dc4380ceae75c29450Johann    err_dup0 = __msa_splati_d(err0, 1);
1607bc9febe8749e98a3812a0dc4380ceae75c29450Johann    err_dup1 = __msa_splati_d(err1, 1);
1617bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ADD2(err0, err_dup0, err1, err_dup1, err0, err1);
1627bc9febe8749e98a3812a0dc4380ceae75c29450Johann    err += __msa_copy_s_d(err0, 0);
1637bc9febe8749e98a3812a0dc4380ceae75c29450Johann    err += __msa_copy_s_d(err1, 0);
1647bc9febe8749e98a3812a0dc4380ceae75c29450Johann  }
1657bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1667bc9febe8749e98a3812a0dc4380ceae75c29450Johann  return err;
167da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian}
168