1/*
2 *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "./vp8_rtcd.h"
12#include "vp8/common/mips/msa/vp8_macros_msa.h"
13#include "vp8/encoder/block.h"
14
15int32_t vp8_block_error_msa(int16_t *coeff_ptr, int16_t *dq_coeff_ptr)
16{
17    int32_t err = 0;
18    uint32_t loop_cnt;
19    v8i16 coeff, dq_coeff, coeff0, coeff1;
20    v4i32 diff0, diff1;
21    v2i64 err0 = { 0 };
22    v2i64 err1 = { 0 };
23
24    for (loop_cnt = 2; loop_cnt--;)
25    {
26        coeff = LD_SH(coeff_ptr);
27        dq_coeff = LD_SH(dq_coeff_ptr);
28        ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1);
29        HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
30        DPADD_SD2_SD(diff0, diff1, err0, err1);
31        coeff_ptr += 8;
32        dq_coeff_ptr += 8;
33    }
34
35    err0 += __msa_splati_d(err0, 1);
36    err1 += __msa_splati_d(err1, 1);
37    err = __msa_copy_s_d(err0, 0);
38    err += __msa_copy_s_d(err1, 0);
39
40    return err;
41}
42
43int32_t vp8_mbblock_error_msa(MACROBLOCK *mb, int32_t dc)
44{
45    BLOCK *be;
46    BLOCKD *bd;
47    int16_t *coeff_ptr, *dq_coeff_ptr;
48    int32_t err = 0;
49    uint32_t loop_cnt;
50    v8i16 coeff, coeff0, coeff1, coeff2, coeff3, coeff4;
51    v8i16 dq_coeff, dq_coeff2, dq_coeff3, dq_coeff4;
52    v4i32 diff0, diff1;
53    v2i64 err0, err1;
54    v16u8 zero  = { 0 };
55    v16u8 mask0 = (v16u8)__msa_ldi_b(255);
56
57    if (1 == dc)
58    {
59        mask0 = (v16u8)__msa_insve_w((v4i32)mask0, 0, (v4i32)zero);
60    }
61
62    for (loop_cnt = 0; loop_cnt < 8; loop_cnt++)
63    {
64        be = &mb->block[2 * loop_cnt];
65        bd = &mb->e_mbd.block[2 * loop_cnt];
66        coeff_ptr = be->coeff;
67        dq_coeff_ptr = bd->dqcoeff;
68        coeff = LD_SH(coeff_ptr);
69        dq_coeff = LD_SH(dq_coeff_ptr);
70        coeff_ptr += 8;
71        dq_coeff_ptr += 8;
72        coeff2 = LD_SH(coeff_ptr);
73        dq_coeff2 = LD_SH(dq_coeff_ptr);
74        be = &mb->block[2 * loop_cnt + 1];
75        bd = &mb->e_mbd.block[2 * loop_cnt + 1];
76        coeff_ptr = be->coeff;
77        dq_coeff_ptr = bd->dqcoeff;
78        coeff3 = LD_SH(coeff_ptr);
79        dq_coeff3 = LD_SH(dq_coeff_ptr);
80        coeff_ptr += 8;
81        dq_coeff_ptr += 8;
82        coeff4 = LD_SH(coeff_ptr);
83        dq_coeff4 = LD_SH(dq_coeff_ptr);
84        ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1);
85        HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
86        diff0 = (v4i32)__msa_bmnz_v(zero, (v16u8)diff0, mask0);
87        DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1);
88        ILVRL_H2_SH(coeff2, dq_coeff2, coeff0, coeff1);
89        HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
90        DPADD_SD2_SD(diff0, diff1, err0, err1);
91        err0 += __msa_splati_d(err0, 1);
92        err1 += __msa_splati_d(err1, 1);
93        err += __msa_copy_s_d(err0, 0);
94        err += __msa_copy_s_d(err1, 0);
95
96        ILVRL_H2_SH(coeff3, dq_coeff3, coeff0, coeff1);
97        HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
98        diff0 = (v4i32)__msa_bmnz_v(zero, (v16u8)diff0, mask0);
99        DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1);
100        ILVRL_H2_SH(coeff4, dq_coeff4, coeff0, coeff1);
101        HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
102        DPADD_SD2_SD(diff0, diff1, err0, err1);
103        err0 += __msa_splati_d(err0, 1);
104        err1 += __msa_splati_d(err1, 1);
105        err += __msa_copy_s_d(err0, 0);
106        err += __msa_copy_s_d(err1, 0);
107    }
108
109    return err;
110}
111
112int32_t vp8_mbuverror_msa(MACROBLOCK *mb)
113{
114    BLOCK *be;
115    BLOCKD *bd;
116    int16_t *coeff_ptr, *dq_coeff_ptr;
117    int32_t err = 0;
118    uint32_t loop_cnt;
119    v8i16 coeff, coeff0, coeff1, coeff2, coeff3, coeff4;
120    v8i16 dq_coeff, dq_coeff2, dq_coeff3, dq_coeff4;
121    v4i32 diff0, diff1;
122    v2i64 err0, err1, err_dup0, err_dup1;
123
124    for (loop_cnt = 16; loop_cnt < 24; loop_cnt += 2)
125    {
126        be = &mb->block[loop_cnt];
127        bd = &mb->e_mbd.block[loop_cnt];
128        coeff_ptr = be->coeff;
129        dq_coeff_ptr = bd->dqcoeff;
130        coeff = LD_SH(coeff_ptr);
131        dq_coeff = LD_SH(dq_coeff_ptr);
132        coeff_ptr += 8;
133        dq_coeff_ptr += 8;
134        coeff2 = LD_SH(coeff_ptr);
135        dq_coeff2 = LD_SH(dq_coeff_ptr);
136        be = &mb->block[loop_cnt + 1];
137        bd = &mb->e_mbd.block[loop_cnt + 1];
138        coeff_ptr = be->coeff;
139        dq_coeff_ptr = bd->dqcoeff;
140        coeff3 = LD_SH(coeff_ptr);
141        dq_coeff3 = LD_SH(dq_coeff_ptr);
142        coeff_ptr += 8;
143        dq_coeff_ptr += 8;
144        coeff4 = LD_SH(coeff_ptr);
145        dq_coeff4 = LD_SH(dq_coeff_ptr);
146
147        ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1);
148        HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
149        DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1);
150
151        ILVRL_H2_SH(coeff2, dq_coeff2, coeff0, coeff1);
152        HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
153        DPADD_SD2_SD(diff0, diff1, err0, err1);
154        err_dup0 = __msa_splati_d(err0, 1);
155        err_dup1 = __msa_splati_d(err1, 1);
156        ADD2(err0, err_dup0, err1, err_dup1, err0, err1);
157        err += __msa_copy_s_d(err0, 0);
158        err += __msa_copy_s_d(err1, 0);
159
160        ILVRL_H2_SH(coeff3, dq_coeff3, coeff0, coeff1);
161        HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
162        DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1);
163        ILVRL_H2_SH(coeff4, dq_coeff4, coeff0, coeff1);
164        HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
165        DPADD_SD2_SD(diff0, diff1, err0, err1);
166        err_dup0 = __msa_splati_d(err0, 1);
167        err_dup1 = __msa_splati_d(err1, 1);
168        ADD2(err0, err_dup0, err1, err_dup1, err0, err1);
169        err += __msa_copy_s_d(err0, 0);
170        err += __msa_copy_s_d(err1, 0);
171    }
172
173    return err;
174}
175