1b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard/* 2b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard * Copyright 2016 The LibYuv Project Authors. All rights reserved. 3b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard * 4b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard * Use of this source code is governed by a BSD-style license 5b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard * that can be found in the LICENSE file in the root of the source 6b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard * tree. An additional intellectual property rights grant can be found 7b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard * in the file PATENTS. All contributing project authors may 8b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard * be found in the AUTHORS file in the root of the source tree. 9b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard */ 10b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 11b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#include <string.h> 12b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 13b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#include "libyuv/row.h" 14b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 15b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard// This module is for GCC MSA 16b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) 17b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#include "libyuv/macros_msa.h" 18b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 19b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#ifdef __cplusplus 20b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardnamespace libyuv { 21b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardextern "C" { 22b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#endif 23b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 24b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define ALPHA_VAL (-1) 25b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 26b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard// Fill YUV -> RGB conversion constants into vectors 27b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define YUVTORGB_SETUP(yuvconst, ub, vr, ug, vg, bb, bg, br, yg) \ 28b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard { \ 29b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ub = __msa_fill_w(yuvconst->kUVToB[0]); \ 30b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vr = __msa_fill_w(yuvconst->kUVToR[1]); \ 31b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ug = __msa_fill_w(yuvconst->kUVToG[0]); \ 32b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vg = __msa_fill_w(yuvconst->kUVToG[1]); \ 33b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard bb = __msa_fill_w(yuvconst->kUVBiasB[0]); \ 34b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard bg = __msa_fill_w(yuvconst->kUVBiasG[0]); \ 35b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard br = __msa_fill_w(yuvconst->kUVBiasR[0]); \ 36b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard yg = __msa_fill_w(yuvconst->kYToRgb[0]); \ 37b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 38b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 39b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard// Load YUV 422 pixel data 40b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define READYUV422(psrc_y, psrc_u, psrc_v, out_y, out_u, out_v) \ 41b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard { \ 42b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint64 y_m; \ 43b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint32 u_m, v_m; \ 44b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 zero_m = {0}; \ 45b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard y_m = LD(psrc_y); \ 46b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard u_m = LW(psrc_u); \ 47b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v_m = LW(psrc_v); \ 48b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard out_y = (v16u8)__msa_insert_d((v2i64)zero_m, 0, (int64)y_m); \ 49b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard out_u = (v16u8)__msa_insert_w(zero_m, 0, (int32)u_m); \ 50b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard out_v = (v16u8)__msa_insert_w(zero_m, 0, (int32)v_m); \ 51b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 52b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 53b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard// Clip input vector elements between 0 to 255 54b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define CLIP_0TO255(in0, in1, in2, in3, in4, in5) \ 55b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard { \ 56b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 max_m = __msa_ldi_w(0xFF); \ 57b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard \ 58b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard in0 = __msa_maxi_s_w(in0, 0); \ 59b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard in1 = __msa_maxi_s_w(in1, 0); \ 60b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard in2 = __msa_maxi_s_w(in2, 0); \ 61b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard in3 = __msa_maxi_s_w(in3, 0); \ 62b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard in4 = __msa_maxi_s_w(in4, 0); \ 63b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard in5 = __msa_maxi_s_w(in5, 0); \ 64b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard in0 = __msa_min_s_w(max_m, in0); \ 65b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard in1 = __msa_min_s_w(max_m, in1); \ 66b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard in2 = __msa_min_s_w(max_m, in2); \ 67b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard in3 = __msa_min_s_w(max_m, in3); \ 68b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard in4 = __msa_min_s_w(max_m, in4); \ 69b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard in5 = __msa_min_s_w(max_m, in5); \ 70b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 71b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 72b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard// Convert 8 pixels of YUV 420 to RGB. 73b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define YUVTORGB(in_y, in_uv, ubvr, ugvg, bb, bg, br, yg, out_b, out_g, out_r) \ 74b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard { \ 75b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8i16 vec0_m, vec1_m; \ 76b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 reg0_m, reg1_m, reg2_m, reg3_m, reg4_m; \ 77b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 reg5_m, reg6_m, reg7_m; \ 78b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 zero_m = {0}; \ 79b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard \ 80b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0_m = (v8i16)__msa_ilvr_b((v16i8)in_y, (v16i8)in_y); \ 81b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1_m = (v8i16)__msa_ilvr_b((v16i8)zero_m, (v16i8)in_uv); \ 82b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0_m = (v4i32)__msa_ilvr_h((v8i16)zero_m, (v8i16)vec0_m); \ 83b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1_m = (v4i32)__msa_ilvl_h((v8i16)zero_m, (v8i16)vec0_m); \ 84b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2_m = (v4i32)__msa_ilvr_h((v8i16)zero_m, (v8i16)vec1_m); \ 85b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3_m = (v4i32)__msa_ilvl_h((v8i16)zero_m, (v8i16)vec1_m); \ 86b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0_m *= yg; \ 87b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1_m *= yg; \ 88b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2_m *= ubvr; \ 89b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3_m *= ubvr; \ 90b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0_m = __msa_srai_w(reg0_m, 16); \ 91b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1_m = __msa_srai_w(reg1_m, 16); \ 92b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4_m = __msa_dotp_s_w((v8i16)vec1_m, (v8i16)ugvg); \ 93b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5_m = __msa_ilvev_w(reg2_m, reg2_m); \ 94b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg6_m = __msa_ilvev_w(reg3_m, reg3_m); \ 95b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg7_m = __msa_ilvr_w(reg4_m, reg4_m); \ 96b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2_m = __msa_ilvod_w(reg2_m, reg2_m); \ 97b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3_m = __msa_ilvod_w(reg3_m, reg3_m); \ 98b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4_m = __msa_ilvl_w(reg4_m, reg4_m); \ 99b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5_m = reg0_m - reg5_m; \ 100b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg6_m = reg1_m - reg6_m; \ 101b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2_m = reg0_m - reg2_m; \ 102b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3_m = reg1_m - reg3_m; \ 103b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg7_m = reg0_m - reg7_m; \ 104b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4_m = reg1_m - reg4_m; \ 105b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5_m += bb; \ 106b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg6_m += bb; \ 107b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg7_m += bg; \ 108b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4_m += bg; \ 109b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2_m += br; \ 110b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3_m += br; \ 111b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5_m = __msa_srai_w(reg5_m, 6); \ 112b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg6_m = __msa_srai_w(reg6_m, 6); \ 113b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg7_m = __msa_srai_w(reg7_m, 6); \ 114b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4_m = __msa_srai_w(reg4_m, 6); \ 115b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2_m = __msa_srai_w(reg2_m, 6); \ 116b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3_m = __msa_srai_w(reg3_m, 6); \ 117b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard CLIP_0TO255(reg5_m, reg6_m, reg7_m, reg4_m, reg2_m, reg3_m); \ 118b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard out_b = __msa_pckev_h((v8i16)reg6_m, (v8i16)reg5_m); \ 119b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard out_g = __msa_pckev_h((v8i16)reg4_m, (v8i16)reg7_m); \ 120b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard out_r = __msa_pckev_h((v8i16)reg3_m, (v8i16)reg2_m); \ 121b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 122b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 123b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard// Pack and Store 8 ARGB values. 124b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define STOREARGB(in0, in1, in2, in3, pdst_argb) \ 125b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard { \ 126b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8i16 vec0_m, vec1_m; \ 127b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 dst0_m, dst1_m; \ 128b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0_m = (v8i16)__msa_ilvev_b((v16i8)in1, (v16i8)in0); \ 129b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1_m = (v8i16)__msa_ilvev_b((v16i8)in3, (v16i8)in2); \ 130b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0_m = (v16u8)__msa_ilvr_h(vec1_m, vec0_m); \ 131b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1_m = (v16u8)__msa_ilvl_h(vec1_m, vec0_m); \ 132b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB2(dst0_m, dst1_m, pdst_argb, 16); \ 133b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 134b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 135b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard// Takes ARGB input and calculates Y. 136b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define ARGBTOY(argb0, argb1, argb2, argb3, const0, const1, const2, shift, \ 137b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard y_out) \ 138b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard { \ 139b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 vec0_m, vec1_m, vec2_m, vec3_m; \ 140b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 reg0_m, reg1_m; \ 141b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard \ 142b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0_m = (v16u8)__msa_pckev_h((v8i16)argb1, (v8i16)argb0); \ 143b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1_m = (v16u8)__msa_pckev_h((v8i16)argb3, (v8i16)argb2); \ 144b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2_m = (v16u8)__msa_pckod_h((v8i16)argb1, (v8i16)argb0); \ 145b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3_m = (v16u8)__msa_pckod_h((v8i16)argb3, (v8i16)argb2); \ 146b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0_m = __msa_dotp_u_h(vec0_m, const0); \ 147b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1_m = __msa_dotp_u_h(vec1_m, const0); \ 148b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0_m = __msa_dpadd_u_h(reg0_m, vec2_m, const1); \ 149b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1_m = __msa_dpadd_u_h(reg1_m, vec3_m, const1); \ 150b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0_m += const2; \ 151b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1_m += const2; \ 152b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0_m = (v8u16)__msa_srai_h((v8i16)reg0_m, shift); \ 153b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1_m = (v8u16)__msa_srai_h((v8i16)reg1_m, shift); \ 154b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard y_out = (v16u8)__msa_pckev_b((v16i8)reg1_m, (v16i8)reg0_m); \ 155b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 156b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 157b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard// Loads current and next row of ARGB input and averages it to calculate U and V 158b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define READ_ARGB(s_ptr, t_ptr, argb0, argb1, argb2, argb3) \ 159b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard { \ 160b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0_m, src1_m, src2_m, src3_m, src4_m, src5_m, src6_m, src7_m; \ 161b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \ 162b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 vec8_m, vec9_m; \ 163b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 reg0_m, reg1_m, reg2_m, reg3_m, reg4_m, reg5_m, reg6_m, reg7_m; \ 164b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 reg8_m, reg9_m; \ 165b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard \ 166b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0_m = (v16u8)__msa_ld_b((v16i8*)s, 0); \ 167b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1_m = (v16u8)__msa_ld_b((v16i8*)s, 16); \ 168b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2_m = (v16u8)__msa_ld_b((v16i8*)s, 32); \ 169b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3_m = (v16u8)__msa_ld_b((v16i8*)s, 48); \ 170b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src4_m = (v16u8)__msa_ld_b((v16i8*)t, 0); \ 171b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src5_m = (v16u8)__msa_ld_b((v16i8*)t, 16); \ 172b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src6_m = (v16u8)__msa_ld_b((v16i8*)t, 32); \ 173b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src7_m = (v16u8)__msa_ld_b((v16i8*)t, 48); \ 174b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0_m = (v16u8)__msa_ilvr_b((v16i8)src0_m, (v16i8)src4_m); \ 175b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1_m = (v16u8)__msa_ilvr_b((v16i8)src1_m, (v16i8)src5_m); \ 176b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2_m = (v16u8)__msa_ilvr_b((v16i8)src2_m, (v16i8)src6_m); \ 177b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3_m = (v16u8)__msa_ilvr_b((v16i8)src3_m, (v16i8)src7_m); \ 178b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4_m = (v16u8)__msa_ilvl_b((v16i8)src0_m, (v16i8)src4_m); \ 179b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5_m = (v16u8)__msa_ilvl_b((v16i8)src1_m, (v16i8)src5_m); \ 180b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec6_m = (v16u8)__msa_ilvl_b((v16i8)src2_m, (v16i8)src6_m); \ 181b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec7_m = (v16u8)__msa_ilvl_b((v16i8)src3_m, (v16i8)src7_m); \ 182b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0_m = __msa_hadd_u_h(vec0_m, vec0_m); \ 183b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1_m = __msa_hadd_u_h(vec1_m, vec1_m); \ 184b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2_m = __msa_hadd_u_h(vec2_m, vec2_m); \ 185b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3_m = __msa_hadd_u_h(vec3_m, vec3_m); \ 186b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4_m = __msa_hadd_u_h(vec4_m, vec4_m); \ 187b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5_m = __msa_hadd_u_h(vec5_m, vec5_m); \ 188b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg6_m = __msa_hadd_u_h(vec6_m, vec6_m); \ 189b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg7_m = __msa_hadd_u_h(vec7_m, vec7_m); \ 190b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg8_m = (v8u16)__msa_pckev_d((v2i64)reg4_m, (v2i64)reg0_m); \ 191b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg9_m = (v8u16)__msa_pckev_d((v2i64)reg5_m, (v2i64)reg1_m); \ 192b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg8_m += (v8u16)__msa_pckod_d((v2i64)reg4_m, (v2i64)reg0_m); \ 193b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg9_m += (v8u16)__msa_pckod_d((v2i64)reg5_m, (v2i64)reg1_m); \ 194b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0_m = (v8u16)__msa_pckev_d((v2i64)reg6_m, (v2i64)reg2_m); \ 195b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1_m = (v8u16)__msa_pckev_d((v2i64)reg7_m, (v2i64)reg3_m); \ 196b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0_m += (v8u16)__msa_pckod_d((v2i64)reg6_m, (v2i64)reg2_m); \ 197b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1_m += (v8u16)__msa_pckod_d((v2i64)reg7_m, (v2i64)reg3_m); \ 198b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg8_m = (v8u16)__msa_srai_h((v8i16)reg8_m, 2); \ 199b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg9_m = (v8u16)__msa_srai_h((v8i16)reg9_m, 2); \ 200b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0_m = (v8u16)__msa_srai_h((v8i16)reg0_m, 2); \ 201b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1_m = (v8u16)__msa_srai_h((v8i16)reg1_m, 2); \ 202b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard argb0 = (v16u8)__msa_pckev_b((v16i8)reg9_m, (v16i8)reg8_m); \ 203b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard argb1 = (v16u8)__msa_pckev_b((v16i8)reg1_m, (v16i8)reg0_m); \ 204b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0_m = (v16u8)__msa_ld_b((v16i8*)s, 64); \ 205b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1_m = (v16u8)__msa_ld_b((v16i8*)s, 80); \ 206b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2_m = (v16u8)__msa_ld_b((v16i8*)s, 96); \ 207b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3_m = (v16u8)__msa_ld_b((v16i8*)s, 112); \ 208b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src4_m = (v16u8)__msa_ld_b((v16i8*)t, 64); \ 209b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src5_m = (v16u8)__msa_ld_b((v16i8*)t, 80); \ 210b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src6_m = (v16u8)__msa_ld_b((v16i8*)t, 96); \ 211b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src7_m = (v16u8)__msa_ld_b((v16i8*)t, 112); \ 212b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2_m = (v16u8)__msa_ilvr_b((v16i8)src0_m, (v16i8)src4_m); \ 213b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3_m = (v16u8)__msa_ilvr_b((v16i8)src1_m, (v16i8)src5_m); \ 214b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4_m = (v16u8)__msa_ilvr_b((v16i8)src2_m, (v16i8)src6_m); \ 215b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5_m = (v16u8)__msa_ilvr_b((v16i8)src3_m, (v16i8)src7_m); \ 216b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec6_m = (v16u8)__msa_ilvl_b((v16i8)src0_m, (v16i8)src4_m); \ 217b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec7_m = (v16u8)__msa_ilvl_b((v16i8)src1_m, (v16i8)src5_m); \ 218b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec8_m = (v16u8)__msa_ilvl_b((v16i8)src2_m, (v16i8)src6_m); \ 219b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec9_m = (v16u8)__msa_ilvl_b((v16i8)src3_m, (v16i8)src7_m); \ 220b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0_m = __msa_hadd_u_h(vec2_m, vec2_m); \ 221b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1_m = __msa_hadd_u_h(vec3_m, vec3_m); \ 222b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2_m = __msa_hadd_u_h(vec4_m, vec4_m); \ 223b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3_m = __msa_hadd_u_h(vec5_m, vec5_m); \ 224b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4_m = __msa_hadd_u_h(vec6_m, vec6_m); \ 225b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5_m = __msa_hadd_u_h(vec7_m, vec7_m); \ 226b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg6_m = __msa_hadd_u_h(vec8_m, vec8_m); \ 227b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg7_m = __msa_hadd_u_h(vec9_m, vec9_m); \ 228b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg8_m = (v8u16)__msa_pckev_d((v2i64)reg4_m, (v2i64)reg0_m); \ 229b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg9_m = (v8u16)__msa_pckev_d((v2i64)reg5_m, (v2i64)reg1_m); \ 230b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg8_m += (v8u16)__msa_pckod_d((v2i64)reg4_m, (v2i64)reg0_m); \ 231b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg9_m += (v8u16)__msa_pckod_d((v2i64)reg5_m, (v2i64)reg1_m); \ 232b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0_m = (v8u16)__msa_pckev_d((v2i64)reg6_m, (v2i64)reg2_m); \ 233b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1_m = (v8u16)__msa_pckev_d((v2i64)reg7_m, (v2i64)reg3_m); \ 234b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0_m += (v8u16)__msa_pckod_d((v2i64)reg6_m, (v2i64)reg2_m); \ 235b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1_m += (v8u16)__msa_pckod_d((v2i64)reg7_m, (v2i64)reg3_m); \ 236b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg8_m = (v8u16)__msa_srai_h((v8i16)reg8_m, 2); \ 237b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg9_m = (v8u16)__msa_srai_h((v8i16)reg9_m, 2); \ 238b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0_m = (v8u16)__msa_srai_h((v8i16)reg0_m, 2); \ 239b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1_m = (v8u16)__msa_srai_h((v8i16)reg1_m, 2); \ 240b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard argb2 = (v16u8)__msa_pckev_b((v16i8)reg9_m, (v16i8)reg8_m); \ 241b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard argb3 = (v16u8)__msa_pckev_b((v16i8)reg1_m, (v16i8)reg0_m); \ 242b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 243b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 244b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard// Takes ARGB input and calculates U and V. 245b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define ARGBTOUV(argb0, argb1, argb2, argb3, const0, const1, const2, const3, \ 246b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard shf0, shf1, shf2, shf3, v_out, u_out) \ 247b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard { \ 248b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \ 249b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 reg0_m, reg1_m, reg2_m, reg3_m; \ 250b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard \ 251b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0_m = (v16u8)__msa_vshf_b(shf0, (v16i8)argb1, (v16i8)argb0); \ 252b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1_m = (v16u8)__msa_vshf_b(shf0, (v16i8)argb3, (v16i8)argb2); \ 253b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2_m = (v16u8)__msa_vshf_b(shf1, (v16i8)argb1, (v16i8)argb0); \ 254b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3_m = (v16u8)__msa_vshf_b(shf1, (v16i8)argb3, (v16i8)argb2); \ 255b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4_m = (v16u8)__msa_vshf_b(shf2, (v16i8)argb1, (v16i8)argb0); \ 256b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5_m = (v16u8)__msa_vshf_b(shf2, (v16i8)argb3, (v16i8)argb2); \ 257b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec6_m = (v16u8)__msa_vshf_b(shf3, (v16i8)argb1, (v16i8)argb0); \ 258b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec7_m = (v16u8)__msa_vshf_b(shf3, (v16i8)argb3, (v16i8)argb2); \ 259b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0_m = __msa_dotp_u_h(vec0_m, const1); \ 260b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1_m = __msa_dotp_u_h(vec1_m, const1); \ 261b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2_m = __msa_dotp_u_h(vec4_m, const1); \ 262b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3_m = __msa_dotp_u_h(vec5_m, const1); \ 263b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0_m += const3; \ 264b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1_m += const3; \ 265b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2_m += const3; \ 266b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3_m += const3; \ 267b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0_m -= __msa_dotp_u_h(vec2_m, const0); \ 268b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1_m -= __msa_dotp_u_h(vec3_m, const0); \ 269b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2_m -= __msa_dotp_u_h(vec6_m, const2); \ 270b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3_m -= __msa_dotp_u_h(vec7_m, const2); \ 271b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v_out = (v16u8)__msa_pckod_b((v16i8)reg1_m, (v16i8)reg0_m); \ 272b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard u_out = (v16u8)__msa_pckod_b((v16i8)reg3_m, (v16i8)reg2_m); \ 273b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 274b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 275b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard// Load I444 pixel data 276b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define READI444(psrc_y, psrc_u, psrc_v, out_y, out_u, out_v) \ 277b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard { \ 278b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint64 y_m, u_m, v_m; \ 279b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v2i64 zero_m = {0}; \ 280b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard y_m = LD(psrc_y); \ 281b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard u_m = LD(psrc_u); \ 282b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v_m = LD(psrc_v); \ 283b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard out_y = (v16u8)__msa_insert_d(zero_m, 0, (int64)y_m); \ 284b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard out_u = (v16u8)__msa_insert_d(zero_m, 0, (int64)u_m); \ 285b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard out_v = (v16u8)__msa_insert_d(zero_m, 0, (int64)v_m); \ 286b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 287b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 288b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid MirrorRow_MSA(const uint8* src, uint8* dst, int width) { 289b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 290b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, src3; 291b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 dst0, dst1, dst2, dst3; 292b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; 293b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src += width - 64; 294b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 295b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 64) { 296b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard LD_UB4(src, 16, src3, src2, src1, src0); 297b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard VSHF_B2_UB(src3, src3, src2, src2, shuffler, shuffler, dst3, dst2); 298b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard VSHF_B2_UB(src1, src1, src0, src0, shuffler, shuffler, dst1, dst0); 299b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB4(dst0, dst1, dst2, dst3, dst, 16); 300b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst += 64; 301b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src -= 64; 302b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 303b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 304b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 305b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid ARGBMirrorRow_MSA(const uint8* src, uint8* dst, int width) { 306b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 307b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, src3; 308b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 dst0, dst1, dst2, dst3; 309b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler = {12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3}; 310b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src += width * 4 - 64; 311b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 312b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 16) { 313b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard LD_UB4(src, 16, src3, src2, src1, src0); 314b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard VSHF_B2_UB(src3, src3, src2, src2, shuffler, shuffler, dst3, dst2); 315b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard VSHF_B2_UB(src1, src1, src0, src0, shuffler, shuffler, dst1, dst0); 316b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB4(dst0, dst1, dst2, dst3, dst, 16); 317b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst += 64; 318b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src -= 64; 319b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 320b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 321b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 322b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid I422ToYUY2Row_MSA(const uint8* src_y, 323b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_u, 324b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_v, 325b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_yuy2, 326b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 327b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 328b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src_u0, src_v0, src_y0, src_y1, vec_uv0, vec_uv1; 329b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 dst_yuy2_0, dst_yuy2_1, dst_yuy2_2, dst_yuy2_3; 330b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 331b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 32) { 332b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_u0 = LD_UB(src_u); 333b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_v0 = LD_UB(src_v); 334b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard LD_UB2(src_y, 16, src_y0, src_y1); 335b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ILVRL_B2_UB(src_v0, src_u0, vec_uv0, vec_uv1); 336b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ILVRL_B2_UB(vec_uv0, src_y0, dst_yuy2_0, dst_yuy2_1); 337b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ILVRL_B2_UB(vec_uv1, src_y1, dst_yuy2_2, dst_yuy2_3); 338b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB4(dst_yuy2_0, dst_yuy2_1, dst_yuy2_2, dst_yuy2_3, dst_yuy2, 16); 339b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_u += 16; 340b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_v += 16; 341b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_y += 32; 342b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_yuy2 += 64; 343b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 344b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 345b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 346b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid I422ToUYVYRow_MSA(const uint8* src_y, 347b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_u, 348b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_v, 349b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_uyvy, 350b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 351b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 352b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src_u0, src_v0, src_y0, src_y1, vec_uv0, vec_uv1; 353b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 dst_uyvy0, dst_uyvy1, dst_uyvy2, dst_uyvy3; 354b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 355b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 32) { 356b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_u0 = LD_UB(src_u); 357b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_v0 = LD_UB(src_v); 358b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard LD_UB2(src_y, 16, src_y0, src_y1); 359b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ILVRL_B2_UB(src_v0, src_u0, vec_uv0, vec_uv1); 360b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ILVRL_B2_UB(src_y0, vec_uv0, dst_uyvy0, dst_uyvy1); 361b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ILVRL_B2_UB(src_y1, vec_uv1, dst_uyvy2, dst_uyvy3); 362b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB4(dst_uyvy0, dst_uyvy1, dst_uyvy2, dst_uyvy3, dst_uyvy, 16); 363b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_u += 16; 364b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_v += 16; 365b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_y += 32; 366b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_uyvy += 64; 367b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 368b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 369b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 370b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid I422ToARGBRow_MSA(const uint8* src_y, 371b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_u, 372b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_v, 373b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* rgb_buf, 374b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const struct YuvConstants* yuvconstants, 375b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 376b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 377b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2; 378b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8i16 vec0, vec1, vec2; 379b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; 380b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 vec_ubvr, vec_ugvg; 381b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); 382b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 383b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, 384b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_br, vec_yg); 385b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); 386b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); 387b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 388b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 8) { 389b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard READYUV422(src_y, src_u, src_v, src0, src1, src2); 390b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); 391b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, 392b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0, vec1, vec2); 393b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard STOREARGB(vec0, vec1, vec2, alpha, rgb_buf); 394b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_y += 8; 395b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_u += 4; 396b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_v += 4; 397b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard rgb_buf += 32; 398b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 399b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 400b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 401b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid I422ToRGBARow_MSA(const uint8* src_y, 402b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_u, 403b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_v, 404b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* rgb_buf, 405b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const struct YuvConstants* yuvconstants, 406b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 407b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 408b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2; 409b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8i16 vec0, vec1, vec2; 410b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; 411b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 vec_ubvr, vec_ugvg; 412b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); 413b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 414b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, 415b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_br, vec_yg); 416b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); 417b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); 418b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 419b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 8) { 420b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard READYUV422(src_y, src_u, src_v, src0, src1, src2); 421b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); 422b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, 423b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0, vec1, vec2); 424b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard STOREARGB(alpha, vec0, vec1, vec2, rgb_buf); 425b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_y += 8; 426b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_u += 4; 427b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_v += 4; 428b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard rgb_buf += 32; 429b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 430b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 431b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 432b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid I422AlphaToARGBRow_MSA(const uint8* src_y, 433b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_u, 434b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_v, 435b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_a, 436b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* rgb_buf, 437b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const struct YuvConstants* yuvconstants, 438b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 439b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 440b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int64 data_a; 441b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, src3; 442b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8i16 vec0, vec1, vec2; 443b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; 444b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 vec_ubvr, vec_ugvg; 445b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 zero = {0}; 446b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 447b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, 448b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_br, vec_yg); 449b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); 450b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); 451b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 452b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 8) { 453b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard data_a = LD(src_a); 454b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard READYUV422(src_y, src_u, src_v, src0, src1, src2); 455b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); 456b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v16u8)__msa_insert_d((v2i64)zero, 0, data_a); 457b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, 458b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0, vec1, vec2); 459b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v16u8)__msa_ilvr_b((v16i8)src3, (v16i8)src3); 460b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard STOREARGB(vec0, vec1, vec2, src3, rgb_buf); 461b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_y += 8; 462b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_u += 4; 463b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_v += 4; 464b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_a += 8; 465b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard rgb_buf += 32; 466b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 467b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 468b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 469b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid I422ToRGB24Row_MSA(const uint8* src_y, 470b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_u, 471b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_v, 472b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* rgb_buf, 473b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const struct YuvConstants* yuvconstants, 474b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int32 width) { 475b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 476b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int64 data_u, data_v; 477b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, src3, src4, dst0, dst1, dst2; 478b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8i16 vec0, vec1, vec2, vec3, vec4, vec5; 479b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; 480b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 vec_ubvr, vec_ugvg; 481b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 reg0, reg1, reg2, reg3; 482b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v2i64 zero = {0}; 483b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler0 = {0, 1, 16, 2, 3, 17, 4, 5, 18, 6, 7, 19, 8, 9, 20, 10}; 484b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler1 = {0, 21, 1, 2, 22, 3, 4, 23, 5, 6, 24, 7, 8, 25, 9, 10}; 485b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler2 = {26, 6, 7, 27, 8, 9, 28, 10, 486b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 11, 29, 12, 13, 30, 14, 15, 31}; 487b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 488b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, 489b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_br, vec_yg); 490b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); 491b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); 492b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 493b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 16) { 494b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16u8*)src_y, 0); 495b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard data_u = LD(src_u); 496b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard data_v = LD(src_v); 497b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_insert_d(zero, 0, data_u); 498b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_insert_d(zero, 0, data_v); 499b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); 500b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v16u8)__msa_sldi_b((v16i8)src0, (v16i8)src0, 8); 501b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src4 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src1, 8); 502b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, 503b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0, vec1, vec2); 504b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard YUVTORGB(src3, src4, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, 505b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3, vec4, vec5); 506b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v16u8)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0); 507b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v16u8)__msa_ilvev_b((v16i8)vec4, (v16i8)vec3); 508b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec2); 509b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v16u8)__msa_sldi_b((v16i8)reg2, (v16i8)reg0, 11); 510b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)reg3, (v16i8)reg0); 511b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)reg3, (v16i8)reg1); 512b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)reg3, (v16i8)reg2); 513b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB2(dst0, dst1, rgb_buf, 16); 514b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst2, (rgb_buf + 32)); 515b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_y += 16; 516b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_u += 8; 517b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_v += 8; 518b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard rgb_buf += 48; 519b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 520b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 521b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 522b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard// TODO(fbarchard): Consider AND instead of shift to isolate 5 upper bits of R. 523b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid I422ToRGB565Row_MSA(const uint8* src_y, 524b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_u, 525b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_v, 526b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_rgb565, 527b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const struct YuvConstants* yuvconstants, 528b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 529b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 530b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, dst0; 531b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8i16 vec0, vec1, vec2; 532b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; 533b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 vec_ubvr, vec_ugvg; 534b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 535b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, 536b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_br, vec_yg); 537b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); 538b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); 539b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 540b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 8) { 541b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard READYUV422(src_y, src_u, src_v, src0, src1, src2); 542b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); 543b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, 544b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0, vec2, vec1); 545b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = __msa_srai_h(vec0, 3); 546b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = __msa_srai_h(vec1, 3); 547b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = __msa_srai_h(vec2, 2); 548b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = __msa_slli_h(vec1, 11); 549b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = __msa_slli_h(vec2, 5); 550b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 |= vec1; 551b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)(vec2 | vec0); 552b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, dst_rgb565); 553b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_y += 8; 554b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_u += 4; 555b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_v += 4; 556b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_rgb565 += 16; 557b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 558b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 559b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 560b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard// TODO(fbarchard): Consider AND instead of shift to isolate 4 upper bits of G. 561b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid I422ToARGB4444Row_MSA(const uint8* src_y, 562b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_u, 563b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_v, 564b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_argb4444, 565b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const struct YuvConstants* yuvconstants, 566b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 567b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 568b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, dst0; 569b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8i16 vec0, vec1, vec2; 570b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 reg0, reg1, reg2; 571b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; 572b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 vec_ubvr, vec_ugvg; 573b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0xF000 = (v8u16)__msa_fill_h(0xF000); 574b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 575b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, 576b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_br, vec_yg); 577b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); 578b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); 579b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 580b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 8) { 581b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard READYUV422(src_y, src_u, src_v, src0, src1, src2); 582b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); 583b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, 584b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0, vec1, vec2); 585b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v8u16)__msa_srai_h(vec0, 4); 586b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v8u16)__msa_srai_h(vec1, 4); 587b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v8u16)__msa_srai_h(vec2, 4); 588b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v8u16)__msa_slli_h((v8i16)reg1, 4); 589b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v8u16)__msa_slli_h((v8i16)reg2, 8); 590b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 |= const_0xF000; 591b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 |= reg2; 592b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)(reg1 | reg0); 593b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, dst_argb4444); 594b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_y += 8; 595b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_u += 4; 596b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_v += 4; 597b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_argb4444 += 16; 598b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 599b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 600b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 601b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid I422ToARGB1555Row_MSA(const uint8* src_y, 602b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_u, 603b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_v, 604b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_argb1555, 605b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const struct YuvConstants* yuvconstants, 606b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 607b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 608b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, dst0; 609b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8i16 vec0, vec1, vec2; 610b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 reg0, reg1, reg2; 611b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; 612b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 vec_ubvr, vec_ugvg; 613b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x8000 = (v8u16)__msa_fill_h(0x8000); 614b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 615b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, 616b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_br, vec_yg); 617b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); 618b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); 619b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 620b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 8) { 621b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard READYUV422(src_y, src_u, src_v, src0, src1, src2); 622b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); 623b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, 624b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0, vec1, vec2); 625b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v8u16)__msa_srai_h(vec0, 3); 626b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v8u16)__msa_srai_h(vec1, 3); 627b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v8u16)__msa_srai_h(vec2, 3); 628b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v8u16)__msa_slli_h((v8i16)reg1, 5); 629b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v8u16)__msa_slli_h((v8i16)reg2, 10); 630b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 |= const_0x8000; 631b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 |= reg2; 632b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)(reg1 | reg0); 633b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, dst_argb1555); 634b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_y += 8; 635b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_u += 4; 636b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_v += 4; 637b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_argb1555 += 16; 638b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 639b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 640b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 641b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid YUY2ToYRow_MSA(const uint8* src_yuy2, uint8* dst_y, int width) { 642b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 643b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, src3, dst0, dst1; 644b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 645b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 32) { 646b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard LD_UB4(src_yuy2, 16, src0, src1, src2, src3); 647b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); 648b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); 649b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB2(dst0, dst1, dst_y, 16); 650b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_yuy2 += 64; 651b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_y += 32; 652b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 653b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 654b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 655b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid YUY2ToUVRow_MSA(const uint8* src_yuy2, 656b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int src_stride_yuy2, 657b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_u, 658b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_v, 659b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 660b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_yuy2_next = src_yuy2 + src_stride_yuy2; 661b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 662b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, src3, src4, src5, src6, src7; 663b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 vec0, vec1, dst0, dst1; 664b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 665b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 32) { 666b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard LD_UB4(src_yuy2, 16, src0, src1, src2, src3); 667b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard LD_UB4(src_yuy2_next, 16, src4, src5, src6, src7); 668b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); 669b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); 670b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_pckod_b((v16i8)src5, (v16i8)src4); 671b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v16u8)__msa_pckod_b((v16i8)src7, (v16i8)src6); 672b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = __msa_aver_u_b(src0, src2); 673b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = __msa_aver_u_b(src1, src3); 674b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); 675b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0); 676b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, dst_u); 677b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst1, dst_v); 678b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_yuy2 += 64; 679b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_yuy2_next += 64; 680b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_u += 16; 681b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_v += 16; 682b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 683b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 684b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 685b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid YUY2ToUV422Row_MSA(const uint8* src_yuy2, 686b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_u, 687b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_v, 688b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 689b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 690b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, src3, dst0, dst1; 691b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 692b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 32) { 693b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard LD_UB4(src_yuy2, 16, src0, src1, src2, src3); 694b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); 695b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); 696b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); 697b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); 698b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, dst_u); 699b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst1, dst_v); 700b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_yuy2 += 64; 701b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_u += 16; 702b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_v += 16; 703b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 704b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 705b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 706b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid UYVYToYRow_MSA(const uint8* src_uyvy, uint8* dst_y, int width) { 707b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 708b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, src3, dst0, dst1; 709b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 710b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 32) { 711b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard LD_UB4(src_uyvy, 16, src0, src1, src2, src3); 712b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); 713b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); 714b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB2(dst0, dst1, dst_y, 16); 715b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_uyvy += 64; 716b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_y += 32; 717b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 718b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 719b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 720b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid UYVYToUVRow_MSA(const uint8* src_uyvy, 721b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int src_stride_uyvy, 722b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_u, 723b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_v, 724b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 725b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_uyvy_next = src_uyvy + src_stride_uyvy; 726b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 727b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, src3, src4, src5, src6, src7; 728b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 vec0, vec1, dst0, dst1; 729b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 730b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 32) { 731b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard LD_UB4(src_uyvy, 16, src0, src1, src2, src3); 732b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard LD_UB4(src_uyvy_next, 16, src4, src5, src6, src7); 733b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); 734b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); 735b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_pckev_b((v16i8)src5, (v16i8)src4); 736b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v16u8)__msa_pckev_b((v16i8)src7, (v16i8)src6); 737b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = __msa_aver_u_b(src0, src2); 738b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = __msa_aver_u_b(src1, src3); 739b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); 740b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0); 741b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, dst_u); 742b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst1, dst_v); 743b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_uyvy += 64; 744b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_uyvy_next += 64; 745b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_u += 16; 746b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_v += 16; 747b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 748b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 749b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 750b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid UYVYToUV422Row_MSA(const uint8* src_uyvy, 751b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_u, 752b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_v, 753b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 754b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 755b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, src3, dst0, dst1; 756b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 757b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 32) { 758b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard LD_UB4(src_uyvy, 16, src0, src1, src2, src3); 759b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); 760b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); 761b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); 762b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); 763b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, dst_u); 764b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst1, dst_v); 765b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_uyvy += 64; 766b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_u += 16; 767b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_v += 16; 768b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 769b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 770b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 771b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid ARGBToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) { 772b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 773b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, dst0; 774b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 reg0, reg1, reg2, reg3, reg4, reg5; 775b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 zero = {0}; 776b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x19 = (v8u16)__msa_ldi_h(0x19); 777b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x81 = (v8u16)__msa_ldi_h(0x81); 778b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x42 = (v8u16)__msa_ldi_h(0x42); 779b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); 780b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 781b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 16) { 782b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 0); 783b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 16); 784b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 32); 785b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 48); 786b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); 787b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); 788b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); 789b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); 790b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v8u16)__msa_ilvev_b(zero, (v16i8)vec0); 791b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v8u16)__msa_ilvev_b(zero, (v16i8)vec1); 792b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v8u16)__msa_ilvev_b(zero, (v16i8)vec2); 793b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = (v8u16)__msa_ilvev_b(zero, (v16i8)vec3); 794b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4 = (v8u16)__msa_ilvod_b(zero, (v16i8)vec0); 795b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5 = (v8u16)__msa_ilvod_b(zero, (v16i8)vec1); 796b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 *= const_0x19; 797b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 *= const_0x19; 798b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 *= const_0x81; 799b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 *= const_0x81; 800b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4 *= const_0x42; 801b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5 *= const_0x42; 802b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 += reg2; 803b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 += reg3; 804b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 += reg4; 805b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 += reg5; 806b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 += const_0x1080; 807b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 += const_0x1080; 808b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 8); 809b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v8u16)__msa_srai_h((v8i16)reg1, 8); 810b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0); 811b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, dst_y); 812b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_argb0 += 64; 813b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_y += 16; 814b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 815b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 816b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 817b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid ARGBToUVRow_MSA(const uint8* src_argb0, 818b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int src_stride_argb, 819b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_u, 820b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_v, 821b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 822b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 823b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_argb0_next = src_argb0 + src_stride_argb; 824b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, src3, src4, src5, src6, src7; 825b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; 826b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9; 827b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 dst0, dst1; 828b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x70 = (v8u16)__msa_ldi_h(0x70); 829b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x4A = (v8u16)__msa_ldi_h(0x4A); 830b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x26 = (v8u16)__msa_ldi_h(0x26); 831b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x5E = (v8u16)__msa_ldi_h(0x5E); 832b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x12 = (v8u16)__msa_ldi_h(0x12); 833b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080); 834b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 835b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 32) { 836b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 0); 837b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 16); 838b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 32); 839b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 48); 840b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src4 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 64); 841b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src5 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 80); 842b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src6 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 96); 843b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src7 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 112); 844b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); 845b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); 846b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v16u8)__msa_pckev_b((v16i8)src5, (v16i8)src4); 847b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = (v16u8)__msa_pckev_b((v16i8)src7, (v16i8)src6); 848b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); 849b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); 850b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec6 = (v16u8)__msa_pckod_b((v16i8)src5, (v16i8)src4); 851b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec7 = (v16u8)__msa_pckod_b((v16i8)src7, (v16i8)src6); 852b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec8 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); 853b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec9 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); 854b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec4); 855b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6); 856b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0); 857b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v16u8)__msa_pckod_b((v16i8)vec3, (v16i8)vec2); 858b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = __msa_hadd_u_h(vec8, vec8); 859b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = __msa_hadd_u_h(vec9, vec9); 860b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = __msa_hadd_u_h(vec4, vec4); 861b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = __msa_hadd_u_h(vec5, vec5); 862b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4 = __msa_hadd_u_h(vec0, vec0); 863b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5 = __msa_hadd_u_h(vec1, vec1); 864b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 0); 865b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 16); 866b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 32); 867b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 48); 868b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src4 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 64); 869b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src5 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 80); 870b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src6 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 96); 871b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src7 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 112); 872b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); 873b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); 874b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v16u8)__msa_pckev_b((v16i8)src5, (v16i8)src4); 875b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = (v16u8)__msa_pckev_b((v16i8)src7, (v16i8)src6); 876b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); 877b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); 878b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec6 = (v16u8)__msa_pckod_b((v16i8)src5, (v16i8)src4); 879b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec7 = (v16u8)__msa_pckod_b((v16i8)src7, (v16i8)src6); 880b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec8 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); 881b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec9 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); 882b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec4); 883b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6); 884b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0); 885b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v16u8)__msa_pckod_b((v16i8)vec3, (v16i8)vec2); 886b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 += __msa_hadd_u_h(vec8, vec8); 887b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 += __msa_hadd_u_h(vec9, vec9); 888b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 += __msa_hadd_u_h(vec4, vec4); 889b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 += __msa_hadd_u_h(vec5, vec5); 890b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4 += __msa_hadd_u_h(vec0, vec0); 891b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5 += __msa_hadd_u_h(vec1, vec1); 892b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 2); 893b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v8u16)__msa_srai_h((v8i16)reg1, 2); 894b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v8u16)__msa_srai_h((v8i16)reg2, 2); 895b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = (v8u16)__msa_srai_h((v8i16)reg3, 2); 896b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4 = (v8u16)__msa_srai_h((v8i16)reg4, 2); 897b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5 = (v8u16)__msa_srai_h((v8i16)reg5, 2); 898b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg6 = reg0 * const_0x70; 899b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg7 = reg1 * const_0x70; 900b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg8 = reg2 * const_0x4A; 901b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg9 = reg3 * const_0x4A; 902b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg6 += const_0x8080; 903b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg7 += const_0x8080; 904b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg8 += reg4 * const_0x26; 905b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg9 += reg5 * const_0x26; 906b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 *= const_0x12; 907b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 *= const_0x12; 908b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 *= const_0x5E; 909b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 *= const_0x5E; 910b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4 *= const_0x70; 911b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5 *= const_0x70; 912b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 += reg0; 913b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 += reg1; 914b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4 += const_0x8080; 915b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5 += const_0x8080; 916b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg6 -= reg8; 917b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg7 -= reg9; 918b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4 -= reg2; 919b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5 -= reg3; 920b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg6 = (v8u16)__msa_srai_h((v8i16)reg6, 8); 921b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg7 = (v8u16)__msa_srai_h((v8i16)reg7, 8); 922b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4 = (v8u16)__msa_srai_h((v8i16)reg4, 8); 923b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5 = (v8u16)__msa_srai_h((v8i16)reg5, 8); 924b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_pckev_b((v16i8)reg7, (v16i8)reg6); 925b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4); 926b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, dst_u); 927b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst1, dst_v); 928b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_argb0 += 128; 929b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_argb0_next += 128; 930b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_u += 16; 931b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_v += 16; 932b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 933b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 934b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 935b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid ARGBToRGB24Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) { 936b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 937b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, src3, dst0, dst1, dst2; 938b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler0 = {0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 16, 17, 18, 20}; 939b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler1 = {5, 6, 8, 9, 10, 12, 13, 14, 940b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 16, 17, 18, 20, 21, 22, 24, 25}; 941b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler2 = {10, 12, 13, 14, 16, 17, 18, 20, 942b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 21, 22, 24, 25, 26, 28, 29, 30}; 943b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 944b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 16) { 945b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); 946b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); 947b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_ld_b((v16i8*)src_argb, 32); 948b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v16u8)__msa_ld_b((v16i8*)src_argb, 48); 949b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0); 950b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src2, (v16i8)src1); 951b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src3, (v16i8)src2); 952b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB2(dst0, dst1, dst_rgb, 16); 953b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst2, (dst_rgb + 32)); 954b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_argb += 64; 955b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_rgb += 48; 956b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 957b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 958b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 959b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid ARGBToRAWRow_MSA(const uint8* src_argb, uint8* dst_rgb, int width) { 960b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 961b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, src3, dst0, dst1, dst2; 962b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler0 = {2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, 18, 17, 16, 22}; 963b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler1 = {5, 4, 10, 9, 8, 14, 13, 12, 964b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 18, 17, 16, 22, 21, 20, 26, 25}; 965b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler2 = {8, 14, 13, 12, 18, 17, 16, 22, 966b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 21, 20, 26, 25, 24, 30, 29, 28}; 967b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 968b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 16) { 969b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); 970b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); 971b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_ld_b((v16i8*)src_argb, 32); 972b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v16u8)__msa_ld_b((v16i8*)src_argb, 48); 973b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0); 974b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src2, (v16i8)src1); 975b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src3, (v16i8)src2); 976b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB2(dst0, dst1, dst_rgb, 16); 977b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst2, (dst_rgb + 32)); 978b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_argb += 64; 979b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_rgb += 48; 980b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 981b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 982b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 983b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid ARGBToRGB565Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) { 984b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 985b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, dst0; 986b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; 987b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 zero = {0}; 988b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 989b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 8) { 990b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); 991b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); 992b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v16u8)__msa_srai_b((v16i8)src0, 3); 993b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v16u8)__msa_slli_b((v16i8)src0, 3); 994b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v16u8)__msa_srai_b((v16i8)src0, 5); 995b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = (v16u8)__msa_srai_b((v16i8)src1, 3); 996b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 = (v16u8)__msa_slli_b((v16i8)src1, 3); 997b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec6 = (v16u8)__msa_srai_b((v16i8)src1, 5); 998b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v16u8)__msa_sldi_b(zero, (v16i8)vec1, 1); 999b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v16u8)__msa_sldi_b(zero, (v16i8)vec2, 1); 1000b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 = (v16u8)__msa_sldi_b(zero, (v16i8)vec5, 1); 1001b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec6 = (v16u8)__msa_sldi_b(zero, (v16i8)vec6, 1); 1002b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = (v16u8)__msa_sldi_b(zero, (v16i8)src0, 2); 1003b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec7 = (v16u8)__msa_sldi_b(zero, (v16i8)src1, 2); 1004b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = __msa_binsli_b(vec0, vec1, 2); 1005b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = __msa_binsli_b(vec2, vec3, 4); 1006b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = __msa_binsli_b(vec4, vec5, 2); 1007b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 = __msa_binsli_b(vec6, vec7, 4); 1008b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v16u8)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0); 1009b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = (v16u8)__msa_ilvev_b((v16i8)vec5, (v16i8)vec4); 1010b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_pckev_h((v8i16)vec4, (v8i16)vec0); 1011b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, dst_rgb); 1012b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_argb += 32; 1013b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_rgb += 16; 1014b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 1015b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 1016b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1017b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid ARGBToARGB1555Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) { 1018b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 1019b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, dst0; 1020b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; 1021b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 zero = {0}; 1022b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1023b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 8) { 1024b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); 1025b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); 1026b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v16u8)__msa_srai_b((v16i8)src0, 3); 1027b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v16u8)__msa_slli_b((v16i8)src0, 2); 1028b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v16u8)__msa_srai_b((v16i8)vec0, 3); 1029b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v16u8)__msa_sldi_b(zero, (v16i8)vec1, 1); 1030b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v16u8)__msa_sldi_b(zero, (v16i8)vec2, 1); 1031b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = (v16u8)__msa_srai_b((v16i8)src0, 1); 1032b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 = (v16u8)__msa_srai_b((v16i8)src1, 3); 1033b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec6 = (v16u8)__msa_slli_b((v16i8)src1, 2); 1034b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec7 = (v16u8)__msa_srai_b((v16i8)vec5, 3); 1035b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec6 = (v16u8)__msa_sldi_b(zero, (v16i8)vec6, 1); 1036b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec7 = (v16u8)__msa_sldi_b(zero, (v16i8)vec7, 1); 1037b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec8 = (v16u8)__msa_srai_b((v16i8)src1, 1); 1038b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = (v16u8)__msa_sldi_b(zero, (v16i8)vec3, 2); 1039b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec8 = (v16u8)__msa_sldi_b(zero, (v16i8)vec8, 2); 1040b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = (v16u8)__msa_sldi_b(zero, (v16i8)src0, 3); 1041b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec9 = (v16u8)__msa_sldi_b(zero, (v16i8)src1, 3); 1042b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = __msa_binsli_b(vec0, vec1, 2); 1043b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 = __msa_binsli_b(vec5, vec6, 2); 1044b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = __msa_binsli_b(vec2, vec3, 5); 1045b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec6 = __msa_binsli_b(vec7, vec8, 5); 1046b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = __msa_binsli_b(vec1, vec4, 0); 1047b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec6 = __msa_binsli_b(vec6, vec9, 0); 1048b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v16u8)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0); 1049b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v16u8)__msa_ilvev_b((v16i8)vec6, (v16i8)vec5); 1050b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_pckev_h((v8i16)vec1, (v8i16)vec0); 1051b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, dst_rgb); 1052b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_argb += 32; 1053b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_rgb += 16; 1054b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 1055b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 1056b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1057b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid ARGBToARGB4444Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) { 1058b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 1059b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1; 1060b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 vec0, vec1; 1061b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 dst0; 1062b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 zero = {0}; 1063b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1064b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 8) { 1065b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); 1066b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); 1067b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v16u8)__msa_srai_b((v16i8)src0, 4); 1068b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v16u8)__msa_srai_b((v16i8)src1, 4); 1069b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_sldi_b(zero, (v16i8)src0, 1); 1070b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_sldi_b(zero, (v16i8)src1, 1); 1071b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = __msa_binsli_b(vec0, src0, 3); 1072b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = __msa_binsli_b(vec1, src1, 3); 1073b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); 1074b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, dst_rgb); 1075b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_argb += 32; 1076b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_rgb += 16; 1077b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 1078b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 1079b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1080b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid ARGBToUV444Row_MSA(const uint8* src_argb, 1081b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_u, 1082b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_v, 1083b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int32 width) { 1084b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int32 x; 1085b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, src3, reg0, reg1, reg2, reg3, dst0, dst1; 1086b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; 1087b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 vec8, vec9, vec10, vec11; 1088b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_112 = (v8u16)__msa_ldi_h(112); 1089b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_74 = (v8u16)__msa_ldi_h(74); 1090b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_38 = (v8u16)__msa_ldi_h(38); 1091b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_94 = (v8u16)__msa_ldi_h(94); 1092b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_18 = (v8u16)__msa_ldi_h(18); 1093b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_32896 = (v8u16)__msa_fill_h(32896); 1094b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 zero = {0}; 1095b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1096b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = width; x > 0; x -= 16) { 1097b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); 1098b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); 1099b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_ld_b((v16i8*)src_argb, 32); 1100b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v16u8)__msa_ld_b((v16i8*)src_argb, 48); 1101b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); 1102b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); 1103b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); 1104b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); 1105b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0); 1106b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_pckev_b((v16i8)reg3, (v16i8)reg2); 1107b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_pckod_b((v16i8)reg1, (v16i8)reg0); 1108b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_ilvr_b(zero, (v16i8)src0); 1109b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8u16)__msa_ilvl_b(zero, (v16i8)src0); 1110b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v8u16)__msa_ilvr_b(zero, (v16i8)src1); 1111b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = (v8u16)__msa_ilvl_b(zero, (v16i8)src1); 1112b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = (v8u16)__msa_ilvr_b(zero, (v16i8)src2); 1113b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 = (v8u16)__msa_ilvl_b(zero, (v16i8)src2); 1114b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec10 = vec0 * const_18; 1115b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec11 = vec1 * const_18; 1116b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec8 = vec2 * const_94; 1117b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec9 = vec3 * const_94; 1118b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec6 = vec4 * const_112; 1119b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec7 = vec5 * const_112; 1120b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 *= const_112; 1121b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 *= const_112; 1122b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 *= const_74; 1123b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 *= const_74; 1124b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 *= const_38; 1125b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 *= const_38; 1126b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec8 += vec10; 1127b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec9 += vec11; 1128b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec6 += const_32896; 1129b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec7 += const_32896; 1130b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 += const_32896; 1131b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 += const_32896; 1132b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 += vec4; 1133b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 += vec5; 1134b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 -= vec2; 1135b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 -= vec3; 1136b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec6 -= vec8; 1137b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec7 -= vec9; 1138b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_srai_h((v8i16)vec0, 8); 1139b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8u16)__msa_srai_h((v8i16)vec1, 8); 1140b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec6 = (v8u16)__msa_srai_h((v8i16)vec6, 8); 1141b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec7 = (v8u16)__msa_srai_h((v8i16)vec7, 8); 1142b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); 1143b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6); 1144b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, dst_u); 1145b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst1, dst_v); 1146b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_argb += 64; 1147b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_u += 16; 1148b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_v += 16; 1149b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 1150b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 1151b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1152b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid ARGBMultiplyRow_MSA(const uint8* src_argb0, 1153b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_argb1, 1154b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_argb, 1155b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 1156b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 1157b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, dst0; 1158b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 vec0, vec1, vec2, vec3; 1159b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4u32 reg0, reg1, reg2, reg3; 1160b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8i16 zero = {0}; 1161b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1162b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 4) { 1163b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0); 1164b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 0); 1165b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0); 1166b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8u16)__msa_ilvl_b((v16i8)src0, (v16i8)src0); 1167b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src1); 1168b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = (v8u16)__msa_ilvl_b((v16i8)zero, (v16i8)src1); 1169b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec0); 1170b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec0); 1171b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec1); 1172b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec1); 1173b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec2); 1174b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec2); 1175b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec3); 1176b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec3); 1177b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v4u32)__msa_srai_w((v4i32)reg0, 16); 1178b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v4u32)__msa_srai_w((v4i32)reg1, 16); 1179b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v4u32)__msa_srai_w((v4i32)reg2, 16); 1180b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = (v4u32)__msa_srai_w((v4i32)reg3, 16); 1181b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); 1182b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); 1183b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); 1184b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, dst_argb); 1185b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_argb0 += 16; 1186b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_argb1 += 16; 1187b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_argb += 16; 1188b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 1189b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 1190b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1191b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid ARGBAddRow_MSA(const uint8* src_argb0, 1192b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_argb1, 1193b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_argb, 1194b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 1195b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 1196b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, src3, dst0, dst1; 1197b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1198b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 8) { 1199b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0); 1200b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16); 1201b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 0); 1202b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 16); 1203b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = __msa_adds_u_b(src0, src2); 1204b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = __msa_adds_u_b(src1, src3); 1205b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB2(dst0, dst1, dst_argb, 16); 1206b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_argb0 += 32; 1207b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_argb1 += 32; 1208b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_argb += 32; 1209b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 1210b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 1211b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1212b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid ARGBSubtractRow_MSA(const uint8* src_argb0, 1213b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_argb1, 1214b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_argb, 1215b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 1216b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 1217b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, src3, dst0, dst1; 1218b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1219b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 8) { 1220b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0); 1221b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16); 1222b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 0); 1223b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 16); 1224b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = __msa_subs_u_b(src0, src2); 1225b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = __msa_subs_u_b(src1, src3); 1226b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB2(dst0, dst1, dst_argb, 16); 1227b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_argb0 += 32; 1228b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_argb1 += 32; 1229b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_argb += 32; 1230b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 1231b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 1232b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1233b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid ARGBAttenuateRow_MSA(const uint8* src_argb, uint8* dst_argb, int width) { 1234b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 1235b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, dst0, dst1; 1236b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; 1237b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4u32 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7; 1238b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8i16 zero = {0}; 1239b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 mask = {0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255}; 1240b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1241b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 8) { 1242b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); 1243b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); 1244b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0); 1245b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8u16)__msa_ilvl_b((v16i8)src0, (v16i8)src0); 1246b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v8u16)__msa_ilvr_b((v16i8)src1, (v16i8)src1); 1247b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = (v8u16)__msa_ilvl_b((v16i8)src1, (v16i8)src1); 1248b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = (v8u16)__msa_fill_h(vec0[3]); 1249b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 = (v8u16)__msa_fill_h(vec0[7]); 1250b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec6 = (v8u16)__msa_fill_h(vec1[3]); 1251b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec7 = (v8u16)__msa_fill_h(vec1[7]); 1252b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = (v8u16)__msa_pckev_d((v2i64)vec5, (v2i64)vec4); 1253b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 = (v8u16)__msa_pckev_d((v2i64)vec7, (v2i64)vec6); 1254b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec6 = (v8u16)__msa_fill_h(vec2[3]); 1255b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec7 = (v8u16)__msa_fill_h(vec2[7]); 1256b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec8 = (v8u16)__msa_fill_h(vec3[3]); 1257b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec9 = (v8u16)__msa_fill_h(vec3[7]); 1258b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec6 = (v8u16)__msa_pckev_d((v2i64)vec7, (v2i64)vec6); 1259b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec7 = (v8u16)__msa_pckev_d((v2i64)vec9, (v2i64)vec8); 1260b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec4); 1261b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec4); 1262b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec5); 1263b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec5); 1264b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec6); 1265b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec6); 1266b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg6 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec7); 1267b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg7 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec7); 1268b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec0); 1269b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec0); 1270b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec1); 1271b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec1); 1272b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec2); 1273b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec2); 1274b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg6 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec3); 1275b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg7 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec3); 1276b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v4u32)__msa_srai_w((v4i32)reg0, 24); 1277b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v4u32)__msa_srai_w((v4i32)reg1, 24); 1278b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v4u32)__msa_srai_w((v4i32)reg2, 24); 1279b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = (v4u32)__msa_srai_w((v4i32)reg3, 24); 1280b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4 = (v4u32)__msa_srai_w((v4i32)reg4, 24); 1281b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5 = (v4u32)__msa_srai_w((v4i32)reg5, 24); 1282b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg6 = (v4u32)__msa_srai_w((v4i32)reg6, 24); 1283b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg7 = (v4u32)__msa_srai_w((v4i32)reg7, 24); 1284b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); 1285b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); 1286b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v8u16)__msa_pckev_h((v8i16)reg5, (v8i16)reg4); 1287b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = (v8u16)__msa_pckev_h((v8i16)reg7, (v8i16)reg6); 1288b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); 1289b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); 1290b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = __msa_bmnz_v(dst0, src0, mask); 1291b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = __msa_bmnz_v(dst1, src1, mask); 1292b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB2(dst0, dst1, dst_argb, 16); 1293b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_argb += 32; 1294b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_argb += 32; 1295b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 1296b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 1297b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1298b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid ARGBToRGB565DitherRow_MSA(const uint8* src_argb, 1299b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_rgb, 1300b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint32 dither4, 1301b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 1302b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 1303b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, dst0, vec0, vec1; 1304b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8i16 vec_d0; 1305b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8i16 reg0, reg1, reg2; 1306b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 zero = {0}; 1307b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8i16 max = __msa_ldi_h(0xFF); 1308b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1309b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_d0 = (v8i16)__msa_fill_w(dither4); 1310b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_d0 = (v8i16)__msa_ilvr_b(zero, (v16i8)vec_d0); 1311b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1312b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 8) { 1313b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); 1314b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); 1315b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); 1316b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); 1317b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v8i16)__msa_ilvev_b(zero, (v16i8)vec0); 1318b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v8i16)__msa_ilvev_b(zero, (v16i8)vec1); 1319b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v8i16)__msa_ilvod_b(zero, (v16i8)vec0); 1320b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 += vec_d0; 1321b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 += vec_d0; 1322b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 += vec_d0; 1323b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = __msa_maxi_s_h((v8i16)reg0, 0); 1324b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = __msa_maxi_s_h((v8i16)reg1, 0); 1325b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = __msa_maxi_s_h((v8i16)reg2, 0); 1326b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = __msa_min_s_h((v8i16)max, (v8i16)reg0); 1327b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = __msa_min_s_h((v8i16)max, (v8i16)reg1); 1328b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = __msa_min_s_h((v8i16)max, (v8i16)reg2); 1329b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = __msa_srai_h(reg0, 3); 1330b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = __msa_srai_h(reg2, 3); 1331b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = __msa_srai_h(reg1, 2); 1332b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = __msa_slli_h(reg2, 11); 1333b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = __msa_slli_h(reg1, 5); 1334b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 |= reg1; 1335b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)(reg0 | reg2); 1336b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, dst_rgb); 1337b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_argb += 32; 1338b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_rgb += 16; 1339b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 1340b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 1341b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1342b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid ARGBShuffleRow_MSA(const uint8* src_argb, 1343b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_argb, 1344b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* shuffler, 1345b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 1346b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 1347b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, dst0, dst1; 1348b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 vec0; 1349b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler_vec = {0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12}; 1350b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int32 val = LW((int32*)shuffler); 1351b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1352b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v16i8)__msa_fill_w(val); 1353b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard shuffler_vec += vec0; 1354b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1355b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 8) { 1356b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16u8*)src_argb, 0); 1357b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16u8*)src_argb, 16); 1358b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_vshf_b(shuffler_vec, (v16i8)src0, (v16i8)src0); 1359b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_vshf_b(shuffler_vec, (v16i8)src1, (v16i8)src1); 1360b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB2(dst0, dst1, dst_argb, 16); 1361b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_argb += 32; 1362b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_argb += 32; 1363b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 1364b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 1365b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1366b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid ARGBShadeRow_MSA(const uint8* src_argb, 1367b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_argb, 1368b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width, 1369b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint32 value) { 1370b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 1371b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, dst0; 1372b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 vec0, vec1; 1373b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4u32 reg0, reg1, reg2, reg3, rgba_scale; 1374b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8i16 zero = {0}; 1375b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1376b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard rgba_scale[0] = value; 1377b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard rgba_scale = (v4u32)__msa_ilvr_b((v16i8)rgba_scale, (v16i8)rgba_scale); 1378b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard rgba_scale = (v4u32)__msa_ilvr_h(zero, (v8i16)rgba_scale); 1379b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1380b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 4) { 1381b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16u8*)src_argb, 0); 1382b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0); 1383b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8u16)__msa_ilvl_b((v16i8)src0, (v16i8)src0); 1384b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec0); 1385b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec0); 1386b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec1); 1387b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec1); 1388b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 *= rgba_scale; 1389b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 *= rgba_scale; 1390b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 *= rgba_scale; 1391b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 *= rgba_scale; 1392b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v4u32)__msa_srai_w((v4i32)reg0, 24); 1393b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v4u32)__msa_srai_w((v4i32)reg1, 24); 1394b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v4u32)__msa_srai_w((v4i32)reg2, 24); 1395b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = (v4u32)__msa_srai_w((v4i32)reg3, 24); 1396b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); 1397b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); 1398b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); 1399b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, dst_argb); 1400b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_argb += 16; 1401b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_argb += 16; 1402b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 1403b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 1404b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1405b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid ARGBGrayRow_MSA(const uint8* src_argb, uint8* dst_argb, int width) { 1406b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 1407b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, vec0, vec1, dst0, dst1; 1408b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 reg0; 1409b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 const_0x26 = (v16u8)__msa_ldi_h(0x26); 1410b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 const_0x4B0F = (v16u8)__msa_fill_h(0x4B0F); 1411b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1412b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 8) { 1413b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16u8*)src_argb, 0); 1414b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16u8*)src_argb, 16); 1415b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v16u8)__msa_pckev_h((v8i16)src1, (v8i16)src0); 1416b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v16u8)__msa_pckod_h((v8i16)src1, (v8i16)src0); 1417b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = __msa_dotp_u_h(vec0, const_0x4B0F); 1418b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = __msa_dpadd_u_h(reg0, vec1, const_0x26); 1419b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v8u16)__msa_srari_h((v8i16)reg0, 7); 1420b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v16u8)__msa_ilvev_b((v16i8)reg0, (v16i8)reg0); 1421b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v16u8)__msa_ilvod_b((v16i8)vec1, (v16i8)vec0); 1422b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_ilvr_b((v16i8)vec1, (v16i8)vec0); 1423b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_ilvl_b((v16i8)vec1, (v16i8)vec0); 1424b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB2(dst0, dst1, dst_argb, 16); 1425b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_argb += 32; 1426b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_argb += 32; 1427b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 1428b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 1429b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1430b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid ARGBSepiaRow_MSA(uint8* dst_argb, int width) { 1431b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 1432b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, dst0, dst1, vec0, vec1, vec2, vec3, vec4, vec5; 1433b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 reg0, reg1, reg2; 1434b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 const_0x4411 = (v16u8)__msa_fill_h(0x4411); 1435b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 const_0x23 = (v16u8)__msa_ldi_h(0x23); 1436b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 const_0x5816 = (v16u8)__msa_fill_h(0x5816); 1437b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 const_0x2D = (v16u8)__msa_ldi_h(0x2D); 1438b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 const_0x6218 = (v16u8)__msa_fill_h(0x6218); 1439b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 const_0x32 = (v16u8)__msa_ldi_h(0x32); 1440b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0xFF = (v8u16)__msa_ldi_h(0xFF); 1441b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1442b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 8) { 1443b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16u8*)dst_argb, 0); 1444b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16u8*)dst_argb, 16); 1445b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v16u8)__msa_pckev_h((v8i16)src1, (v8i16)src0); 1446b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v16u8)__msa_pckod_h((v8i16)src1, (v8i16)src0); 1447b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec1); 1448b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v8u16)__msa_dotp_u_h(vec0, const_0x4411); 1449b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v8u16)__msa_dotp_u_h(vec0, const_0x5816); 1450b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v8u16)__msa_dotp_u_h(vec0, const_0x6218); 1451b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v8u16)__msa_dpadd_u_h(reg0, vec1, const_0x23); 1452b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v8u16)__msa_dpadd_u_h(reg1, vec1, const_0x2D); 1453b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v8u16)__msa_dpadd_u_h(reg2, vec1, const_0x32); 1454b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 7); 1455b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v8u16)__msa_srai_h((v8i16)reg1, 7); 1456b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v8u16)__msa_srai_h((v8i16)reg2, 7); 1457b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v8u16)__msa_min_u_h((v8u16)reg1, const_0xFF); 1458b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v8u16)__msa_min_u_h((v8u16)reg2, const_0xFF); 1459b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v16u8)__msa_pckev_b((v16i8)reg0, (v16i8)reg0); 1460b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg1); 1461b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v16u8)__msa_pckev_b((v16i8)reg2, (v16i8)reg2); 1462b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = (v16u8)__msa_ilvr_b((v16i8)vec2, (v16i8)vec0); 1463b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 = (v16u8)__msa_ilvr_b((v16i8)vec3, (v16i8)vec1); 1464b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_ilvr_b((v16i8)vec5, (v16i8)vec4); 1465b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_ilvl_b((v16i8)vec5, (v16i8)vec4); 1466b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB2(dst0, dst1, dst_argb, 16); 1467b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_argb += 32; 1468b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 1469b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 1470b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1471b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid ARGB4444ToARGBRow_MSA(const uint8* src_argb4444, 1472b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_argb, 1473b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 1474b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 1475b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1; 1476b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 vec0, vec1, vec2, vec3; 1477b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 dst0, dst1, dst2, dst3; 1478b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1479b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 16) { 1480b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16u8*)src_argb4444, 0); 1481b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16u8*)src_argb4444, 16); 1482b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_andi_b(src0, 0x0F); 1483b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8u16)__msa_andi_b(src1, 0x0F); 1484b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v8u16)__msa_andi_b(src0, 0xF0); 1485b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = (v8u16)__msa_andi_b(src1, 0xF0); 1486b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 |= (v8u16)__msa_slli_b((v16i8)vec0, 4); 1487b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 |= (v8u16)__msa_slli_b((v16i8)vec1, 4); 1488b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 |= (v8u16)__msa_srli_b((v16i8)vec2, 4); 1489b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 |= (v8u16)__msa_srli_b((v16i8)vec3, 4); 1490b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_ilvr_b((v16i8)vec2, (v16i8)vec0); 1491b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_ilvl_b((v16i8)vec2, (v16i8)vec0); 1492b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst2 = (v16u8)__msa_ilvr_b((v16i8)vec3, (v16i8)vec1); 1493b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst3 = (v16u8)__msa_ilvl_b((v16i8)vec3, (v16i8)vec1); 1494b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); 1495b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_argb4444 += 32; 1496b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_argb += 64; 1497b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 1498b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 1499b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1500b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid ARGB1555ToARGBRow_MSA(const uint8* src_argb1555, 1501b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_argb, 1502b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 1503b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 1504b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 src0, src1; 1505b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 vec0, vec1, vec2, vec3, vec4, vec5; 1506b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 reg0, reg1, reg2, reg3, reg4, reg5, reg6; 1507b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 dst0, dst1, dst2, dst3; 1508b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x1F = (v8u16)__msa_ldi_h(0x1F); 1509b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1510b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 16) { 1511b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v8u16)__msa_ld_h((v8u16*)src_argb1555, 0); 1512b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v8u16)__msa_ld_h((v8u16*)src_argb1555, 16); 1513b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = src0 & const_0x1F; 1514b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = src1 & const_0x1F; 1515b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v8u16)__msa_srli_h((v8i16)src0, 5); 1516b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v8u16)__msa_srli_h((v8i16)src1, 5); 1517b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = src0 & const_0x1F; 1518b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = src1 & const_0x1F; 1519b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v8u16)__msa_srli_h((v8i16)src0, 5); 1520b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v8u16)__msa_srli_h((v8i16)src1, 5); 1521b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = src0 & const_0x1F; 1522b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 = src1 & const_0x1F; 1523b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v8u16)__msa_srli_h((v8i16)src0, 5); 1524b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v8u16)__msa_srli_h((v8i16)src1, 5); 1525b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); 1526b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); 1527b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec4); 1528b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); 1529b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4 = (v16u8)__msa_slli_b((v16i8)reg0, 3); 1530b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5 = (v16u8)__msa_slli_b((v16i8)reg1, 3); 1531b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg6 = (v16u8)__msa_slli_b((v16i8)reg2, 3); 1532b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4 |= (v16u8)__msa_srai_b((v16i8)reg0, 2); 1533b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5 |= (v16u8)__msa_srai_b((v16i8)reg1, 2); 1534b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg6 |= (v16u8)__msa_srai_b((v16i8)reg2, 2); 1535b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = -reg3; 1536b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v16u8)__msa_ilvr_b((v16i8)reg6, (v16i8)reg4); 1537b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v16u8)__msa_ilvl_b((v16i8)reg6, (v16i8)reg4); 1538b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v16u8)__msa_ilvr_b((v16i8)reg3, (v16i8)reg5); 1539b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = (v16u8)__msa_ilvl_b((v16i8)reg3, (v16i8)reg5); 1540b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_ilvr_b((v16i8)reg2, (v16i8)reg0); 1541b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_ilvl_b((v16i8)reg2, (v16i8)reg0); 1542b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst2 = (v16u8)__msa_ilvr_b((v16i8)reg3, (v16i8)reg1); 1543b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst3 = (v16u8)__msa_ilvl_b((v16i8)reg3, (v16i8)reg1); 1544b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); 1545b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_argb1555 += 32; 1546b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_argb += 64; 1547b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 1548b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 1549b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1550b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid RGB565ToARGBRow_MSA(const uint8* src_rgb565, uint8* dst_argb, int width) { 1551b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 1552b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 src0, src1, vec0, vec1, vec2, vec3, vec4, vec5; 1553b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 reg0, reg1, reg2, reg3, reg4, reg5; 1554b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 res0, res1, res2, res3, dst0, dst1, dst2, dst3; 1555b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); 1556b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x1F = (v8u16)__msa_ldi_h(0x1F); 1557b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x7E0 = (v8u16)__msa_fill_h(0x7E0); 1558b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0xF800 = (v8u16)__msa_fill_h(0xF800); 1559b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1560b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 16) { 1561b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v8u16)__msa_ld_h((v8u16*)src_rgb565, 0); 1562b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v8u16)__msa_ld_h((v8u16*)src_rgb565, 16); 1563b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = src0 & const_0x1F; 1564b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = src0 & const_0x7E0; 1565b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = src0 & const_0xF800; 1566b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = src1 & const_0x1F; 1567b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = src1 & const_0x7E0; 1568b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 = src1 & const_0xF800; 1569b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v8u16)__msa_slli_h((v8i16)vec0, 3); 1570b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v8u16)__msa_srli_h((v8i16)vec1, 3); 1571b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v8u16)__msa_srli_h((v8i16)vec2, 8); 1572b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = (v8u16)__msa_slli_h((v8i16)vec3, 3); 1573b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4 = (v8u16)__msa_srli_h((v8i16)vec4, 3); 1574b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5 = (v8u16)__msa_srli_h((v8i16)vec5, 8); 1575b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 |= (v8u16)__msa_srli_h((v8i16)vec0, 2); 1576b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 |= (v8u16)__msa_srli_h((v8i16)vec1, 9); 1577b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 |= (v8u16)__msa_srli_h((v8i16)vec2, 13); 1578b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 |= (v8u16)__msa_srli_h((v8i16)vec3, 2); 1579b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4 |= (v8u16)__msa_srli_h((v8i16)vec4, 9); 1580b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5 |= (v8u16)__msa_srli_h((v8i16)vec5, 13); 1581b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res0 = (v16u8)__msa_ilvev_b((v16i8)reg2, (v16i8)reg0); 1582b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res1 = (v16u8)__msa_ilvev_b((v16i8)alpha, (v16i8)reg1); 1583b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res2 = (v16u8)__msa_ilvev_b((v16i8)reg5, (v16i8)reg3); 1584b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res3 = (v16u8)__msa_ilvev_b((v16i8)alpha, (v16i8)reg4); 1585b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_ilvr_b((v16i8)res1, (v16i8)res0); 1586b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_ilvl_b((v16i8)res1, (v16i8)res0); 1587b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst2 = (v16u8)__msa_ilvr_b((v16i8)res3, (v16i8)res2); 1588b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst3 = (v16u8)__msa_ilvl_b((v16i8)res3, (v16i8)res2); 1589b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); 1590b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_rgb565 += 32; 1591b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_argb += 64; 1592b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 1593b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 1594b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1595b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid RGB24ToARGBRow_MSA(const uint8* src_rgb24, uint8* dst_argb, int width) { 1596b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 1597b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2; 1598b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 vec0, vec1, vec2; 1599b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 dst0, dst1, dst2, dst3; 1600b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); 1601b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler = {0, 1, 2, 16, 3, 4, 5, 17, 6, 7, 8, 18, 9, 10, 11, 19}; 1602b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1603b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 16) { 1604b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)src_rgb24, 0); 1605b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16i8*)src_rgb24, 16); 1606b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_ld_b((v16i8*)src_rgb24, 32); 1607b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src0, 12); 1608b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src1, 8); 1609b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src2, 4); 1610b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_vshf_b(shuffler, (v16i8)alpha, (v16i8)src0); 1611b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_vshf_b(shuffler, (v16i8)alpha, (v16i8)vec0); 1612b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst2 = (v16u8)__msa_vshf_b(shuffler, (v16i8)alpha, (v16i8)vec1); 1613b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst3 = (v16u8)__msa_vshf_b(shuffler, (v16i8)alpha, (v16i8)vec2); 1614b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); 1615b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_rgb24 += 48; 1616b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_argb += 64; 1617b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 1618b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 1619b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1620b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid RAWToARGBRow_MSA(const uint8* src_raw, uint8* dst_argb, int width) { 1621b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 1622b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2; 1623b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 vec0, vec1, vec2; 1624b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 dst0, dst1, dst2, dst3; 1625b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); 1626b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 mask = {2, 1, 0, 16, 5, 4, 3, 17, 8, 7, 6, 18, 11, 10, 9, 19}; 1627b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1628b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 16) { 1629b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)src_raw, 0); 1630b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16i8*)src_raw, 16); 1631b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_ld_b((v16i8*)src_raw, 32); 1632b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src0, 12); 1633b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src1, 8); 1634b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src2, 4); 1635b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_vshf_b(mask, (v16i8)alpha, (v16i8)src0); 1636b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_vshf_b(mask, (v16i8)alpha, (v16i8)vec0); 1637b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst2 = (v16u8)__msa_vshf_b(mask, (v16i8)alpha, (v16i8)vec1); 1638b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst3 = (v16u8)__msa_vshf_b(mask, (v16i8)alpha, (v16i8)vec2); 1639b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); 1640b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_raw += 48; 1641b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_argb += 64; 1642b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 1643b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 1644b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1645b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid ARGB1555ToYRow_MSA(const uint8* src_argb1555, uint8* dst_y, int width) { 1646b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 1647b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 src0, src1, vec0, vec1, vec2, vec3, vec4, vec5; 1648b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 reg0, reg1, reg2, reg3, reg4, reg5; 1649b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 dst0; 1650b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x19 = (v8u16)__msa_ldi_h(0x19); 1651b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x81 = (v8u16)__msa_ldi_h(0x81); 1652b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x42 = (v8u16)__msa_ldi_h(0x42); 1653b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x1F = (v8u16)__msa_ldi_h(0x1F); 1654b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); 1655b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1656b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 16) { 1657b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v8u16)__msa_ld_b((v8i16*)src_argb1555, 0); 1658b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v8u16)__msa_ld_b((v8i16*)src_argb1555, 16); 1659b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = src0 & const_0x1F; 1660b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = src1 & const_0x1F; 1661b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v8u16)__msa_srai_h((v8i16)src0, 5); 1662b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v8u16)__msa_srai_h((v8i16)src1, 5); 1663b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = src0 & const_0x1F; 1664b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = src1 & const_0x1F; 1665b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v8u16)__msa_srai_h((v8i16)src0, 5); 1666b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v8u16)__msa_srai_h((v8i16)src1, 5); 1667b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = src0 & const_0x1F; 1668b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 = src1 & const_0x1F; 1669b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v8u16)__msa_slli_h((v8i16)vec0, 3); 1670b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v8u16)__msa_slli_h((v8i16)vec1, 3); 1671b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 |= (v8u16)__msa_srai_h((v8i16)vec0, 2); 1672b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 |= (v8u16)__msa_srai_h((v8i16)vec1, 2); 1673b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v8u16)__msa_slli_h((v8i16)vec2, 3); 1674b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = (v8u16)__msa_slli_h((v8i16)vec3, 3); 1675b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 |= (v8u16)__msa_srai_h((v8i16)vec2, 2); 1676b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 |= (v8u16)__msa_srai_h((v8i16)vec3, 2); 1677b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4 = (v8u16)__msa_slli_h((v8i16)vec4, 3); 1678b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5 = (v8u16)__msa_slli_h((v8i16)vec5, 3); 1679b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4 |= (v8u16)__msa_srai_h((v8i16)vec4, 2); 1680b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5 |= (v8u16)__msa_srai_h((v8i16)vec5, 2); 1681b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 *= const_0x19; 1682b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 *= const_0x19; 1683b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 *= const_0x81; 1684b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 *= const_0x81; 1685b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4 *= const_0x42; 1686b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5 *= const_0x42; 1687b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 += reg2; 1688b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 += reg3; 1689b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 += reg4; 1690b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 += reg5; 1691b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 += const_0x1080; 1692b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 += const_0x1080; 1693b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 8); 1694b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v8u16)__msa_srai_h((v8i16)reg1, 8); 1695b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0); 1696b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, dst_y); 1697b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_argb1555 += 32; 1698b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_y += 16; 1699b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 1700b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 1701b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1702b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid RGB565ToYRow_MSA(const uint8* src_rgb565, uint8* dst_y, int width) { 1703b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 1704b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 src0, src1, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; 1705b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 reg0, reg1, reg2, reg3, reg4, reg5; 1706b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4u32 res0, res1, res2, res3; 1707b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 dst0; 1708b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4u32 const_0x810019 = (v4u32)__msa_fill_w(0x810019); 1709b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4u32 const_0x010042 = (v4u32)__msa_fill_w(0x010042); 1710b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8i16 const_0x1080 = __msa_fill_h(0x1080); 1711b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x1F = (v8u16)__msa_ldi_h(0x1F); 1712b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x7E0 = (v8u16)__msa_fill_h(0x7E0); 1713b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0xF800 = (v8u16)__msa_fill_h(0xF800); 1714b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1715b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 16) { 1716b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v8u16)__msa_ld_b((v8i16*)src_rgb565, 0); 1717b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v8u16)__msa_ld_b((v8i16*)src_rgb565, 16); 1718b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = src0 & const_0x1F; 1719b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = src0 & const_0x7E0; 1720b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = src0 & const_0xF800; 1721b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = src1 & const_0x1F; 1722b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = src1 & const_0x7E0; 1723b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 = src1 & const_0xF800; 1724b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v8u16)__msa_slli_h((v8i16)vec0, 3); 1725b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v8u16)__msa_srli_h((v8i16)vec1, 3); 1726b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v8u16)__msa_srli_h((v8i16)vec2, 8); 1727b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = (v8u16)__msa_slli_h((v8i16)vec3, 3); 1728b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4 = (v8u16)__msa_srli_h((v8i16)vec4, 3); 1729b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5 = (v8u16)__msa_srli_h((v8i16)vec5, 8); 1730b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 |= (v8u16)__msa_srli_h((v8i16)vec0, 2); 1731b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 |= (v8u16)__msa_srli_h((v8i16)vec1, 9); 1732b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 |= (v8u16)__msa_srli_h((v8i16)vec2, 13); 1733b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 |= (v8u16)__msa_srli_h((v8i16)vec3, 2); 1734b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4 |= (v8u16)__msa_srli_h((v8i16)vec4, 9); 1735b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5 |= (v8u16)__msa_srli_h((v8i16)vec5, 13); 1736b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_ilvr_h((v8i16)reg1, (v8i16)reg0); 1737b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8u16)__msa_ilvl_h((v8i16)reg1, (v8i16)reg0); 1738b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v8u16)__msa_ilvr_h((v8i16)reg4, (v8i16)reg3); 1739b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = (v8u16)__msa_ilvl_h((v8i16)reg4, (v8i16)reg3); 1740b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = (v8u16)__msa_ilvr_h(const_0x1080, (v8i16)reg2); 1741b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 = (v8u16)__msa_ilvl_h(const_0x1080, (v8i16)reg2); 1742b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec6 = (v8u16)__msa_ilvr_h(const_0x1080, (v8i16)reg5); 1743b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec7 = (v8u16)__msa_ilvl_h(const_0x1080, (v8i16)reg5); 1744b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res0 = __msa_dotp_u_w(vec0, (v8u16)const_0x810019); 1745b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res1 = __msa_dotp_u_w(vec1, (v8u16)const_0x810019); 1746b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res2 = __msa_dotp_u_w(vec2, (v8u16)const_0x810019); 1747b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res3 = __msa_dotp_u_w(vec3, (v8u16)const_0x810019); 1748b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res0 = __msa_dpadd_u_w(res0, vec4, (v8u16)const_0x010042); 1749b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res1 = __msa_dpadd_u_w(res1, vec5, (v8u16)const_0x010042); 1750b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res2 = __msa_dpadd_u_w(res2, vec6, (v8u16)const_0x010042); 1751b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res3 = __msa_dpadd_u_w(res3, vec7, (v8u16)const_0x010042); 1752b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res0 = (v4u32)__msa_srai_w((v4i32)res0, 8); 1753b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res1 = (v4u32)__msa_srai_w((v4i32)res1, 8); 1754b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res2 = (v4u32)__msa_srai_w((v4i32)res2, 8); 1755b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res3 = (v4u32)__msa_srai_w((v4i32)res3, 8); 1756b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_pckev_h((v8i16)res1, (v8i16)res0); 1757b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8u16)__msa_pckev_h((v8i16)res3, (v8i16)res2); 1758b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); 1759b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, dst_y); 1760b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_rgb565 += 32; 1761b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_y += 16; 1762b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 1763b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 1764b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1765b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid RGB24ToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) { 1766b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 1767b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, reg0, reg1, reg2, reg3, dst0; 1768b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 vec0, vec1, vec2, vec3; 1769b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x8119 = (v8u16)__msa_fill_h(0x8119); 1770b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x42 = (v8u16)__msa_fill_h(0x42); 1771b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); 1772b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 mask0 = {0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 9, 10, 11, 12}; 1773b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 mask1 = {12, 13, 14, 15, 15, 16, 17, 18, 1774b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 18, 19, 20, 21, 21, 22, 23, 24}; 1775b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 mask2 = {8, 9, 10, 11, 11, 12, 13, 14, 14, 15, 16, 17, 17, 18, 19, 20}; 1776b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 mask3 = {4, 5, 6, 7, 7, 8, 9, 10, 10, 11, 12, 13, 13, 14, 15, 16}; 1777b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 zero = {0}; 1778b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1779b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 16) { 1780b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0); 1781b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16); 1782b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 32); 1783b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v16u8)__msa_vshf_b(mask0, zero, (v16i8)src0); 1784b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0); 1785b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src2, (v16i8)src1); 1786b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = (v16u8)__msa_vshf_b(mask3, zero, (v16i8)src2); 1787b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); 1788b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); 1789b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v8u16)__msa_pckod_h((v8i16)reg1, (v8i16)reg0); 1790b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = (v8u16)__msa_pckod_h((v8i16)reg3, (v8i16)reg2); 1791b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = __msa_dotp_u_h((v16u8)vec0, (v16u8)const_0x8119); 1792b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = __msa_dotp_u_h((v16u8)vec1, (v16u8)const_0x8119); 1793b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = __msa_dpadd_u_h(vec0, (v16u8)vec2, (v16u8)const_0x42); 1794b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = __msa_dpadd_u_h(vec1, (v16u8)vec3, (v16u8)const_0x42); 1795b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 += const_0x1080; 1796b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 += const_0x1080; 1797b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_srai_h((v8i16)vec0, 8); 1798b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8u16)__msa_srai_h((v8i16)vec1, 8); 1799b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); 1800b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, dst_y); 1801b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_argb0 += 48; 1802b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_y += 16; 1803b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 1804b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 1805b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1806b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid RAWToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) { 1807b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 1808b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, reg0, reg1, reg2, reg3, dst0; 1809b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 vec0, vec1, vec2, vec3; 1810b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x8142 = (v8u16)__msa_fill_h(0x8142); 1811b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x19 = (v8u16)__msa_fill_h(0x19); 1812b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); 1813b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 mask0 = {0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 9, 10, 11, 12}; 1814b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 mask1 = {12, 13, 14, 15, 15, 16, 17, 18, 1815b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 18, 19, 20, 21, 21, 22, 23, 24}; 1816b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 mask2 = {8, 9, 10, 11, 11, 12, 13, 14, 14, 15, 16, 17, 17, 18, 19, 20}; 1817b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 mask3 = {4, 5, 6, 7, 7, 8, 9, 10, 10, 11, 12, 13, 13, 14, 15, 16}; 1818b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 zero = {0}; 1819b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1820b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 16) { 1821b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0); 1822b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16); 1823b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 32); 1824b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v16u8)__msa_vshf_b(mask0, zero, (v16i8)src0); 1825b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0); 1826b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src2, (v16i8)src1); 1827b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = (v16u8)__msa_vshf_b(mask3, zero, (v16i8)src2); 1828b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); 1829b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); 1830b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v8u16)__msa_pckod_h((v8i16)reg1, (v8i16)reg0); 1831b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = (v8u16)__msa_pckod_h((v8i16)reg3, (v8i16)reg2); 1832b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = __msa_dotp_u_h((v16u8)vec0, (v16u8)const_0x8142); 1833b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = __msa_dotp_u_h((v16u8)vec1, (v16u8)const_0x8142); 1834b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = __msa_dpadd_u_h(vec0, (v16u8)vec2, (v16u8)const_0x19); 1835b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = __msa_dpadd_u_h(vec1, (v16u8)vec3, (v16u8)const_0x19); 1836b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 += const_0x1080; 1837b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 += const_0x1080; 1838b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_srai_h((v8i16)vec0, 8); 1839b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8u16)__msa_srai_h((v8i16)vec1, 8); 1840b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); 1841b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, dst_y); 1842b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_argb0 += 48; 1843b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_y += 16; 1844b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 1845b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 1846b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1847b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid ARGB1555ToUVRow_MSA(const uint8* src_argb1555, 1848b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int src_stride_argb1555, 1849b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_u, 1850b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_v, 1851b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 1852b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 1853b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint16* s = (const uint16*)src_argb1555; 1854b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint16* t = (const uint16*)(src_argb1555 + src_stride_argb1555); 1855b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int64_t res0, res1; 1856b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 src0, src1, src2, src3, reg0, reg1, reg2, reg3; 1857b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6; 1858b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 dst0; 1859b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x70 = (v8u16)__msa_ldi_h(0x70); 1860b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x4A = (v8u16)__msa_ldi_h(0x4A); 1861b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x26 = (v8u16)__msa_ldi_h(0x26); 1862b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x5E = (v8u16)__msa_ldi_h(0x5E); 1863b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x12 = (v8u16)__msa_ldi_h(0x12); 1864b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080); 1865b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x1F = (v8u16)__msa_ldi_h(0x1F); 1866b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1867b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 16) { 1868b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v8u16)__msa_ld_b((v8i16*)s, 0); 1869b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v8u16)__msa_ld_b((v8i16*)s, 16); 1870b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v8u16)__msa_ld_b((v8i16*)t, 0); 1871b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v8u16)__msa_ld_b((v8i16*)t, 16); 1872b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = src0 & const_0x1F; 1873b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = src1 & const_0x1F; 1874b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 += src2 & const_0x1F; 1875b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 += src3 & const_0x1F; 1876b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); 1877b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v8u16)__msa_srai_h((v8i16)src0, 5); 1878b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v8u16)__msa_srai_h((v8i16)src1, 5); 1879b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v8u16)__msa_srai_h((v8i16)src2, 5); 1880b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v8u16)__msa_srai_h((v8i16)src3, 5); 1881b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = src0 & const_0x1F; 1882b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = src1 & const_0x1F; 1883b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 += src2 & const_0x1F; 1884b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 += src3 & const_0x1F; 1885b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v8u16)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); 1886b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v8u16)__msa_srai_h((v8i16)src0, 5); 1887b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v8u16)__msa_srai_h((v8i16)src1, 5); 1888b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v8u16)__msa_srai_h((v8i16)src2, 5); 1889b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v8u16)__msa_srai_h((v8i16)src3, 5); 1890b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = src0 & const_0x1F; 1891b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 = src1 & const_0x1F; 1892b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 += src2 & const_0x1F; 1893b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 += src3 & const_0x1F; 1894b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = (v8u16)__msa_pckev_b((v16i8)vec5, (v16i8)vec4); 1895b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = __msa_hadd_u_h((v16u8)vec0, (v16u8)vec0); 1896b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = __msa_hadd_u_h((v16u8)vec2, (v16u8)vec2); 1897b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = __msa_hadd_u_h((v16u8)vec4, (v16u8)vec4); 1898b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec6 = (v8u16)__msa_slli_h((v8i16)vec0, 1); 1899b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec6 |= (v8u16)__msa_srai_h((v8i16)vec0, 6); 1900b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_slli_h((v8i16)vec2, 1); 1901b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 |= (v8u16)__msa_srai_h((v8i16)vec2, 6); 1902b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v8u16)__msa_slli_h((v8i16)vec4, 1); 1903b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 |= (v8u16)__msa_srai_h((v8i16)vec4, 6); 1904b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = vec6 * const_0x70; 1905b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = vec0 * const_0x4A; 1906b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = vec2 * const_0x70; 1907b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = vec0 * const_0x5E; 1908b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 += const_0x8080; 1909b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 += vec2 * const_0x26; 1910b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 += const_0x8080; 1911b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 += vec6 * const_0x12; 1912b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 -= reg1; 1913b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 -= reg3; 1914b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 8); 1915b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v8u16)__msa_srai_h((v8i16)reg2, 8); 1916b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_pckev_b((v16i8)reg2, (v16i8)reg0); 1917b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res0 = __msa_copy_u_d((v2i64)dst0, 0); 1918b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res1 = __msa_copy_u_d((v2i64)dst0, 1); 1919b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard SD(res0, dst_u); 1920b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard SD(res1, dst_v); 1921b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard s += 16; 1922b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard t += 16; 1923b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_u += 8; 1924b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_v += 8; 1925b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 1926b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 1927b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1928b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid RGB565ToUVRow_MSA(const uint8* src_rgb565, 1929b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int src_stride_rgb565, 1930b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_u, 1931b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_v, 1932b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 1933b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 1934b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint16* s = (const uint16*)src_rgb565; 1935b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint16* t = (const uint16*)(src_rgb565 + src_stride_rgb565); 1936b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int64_t res0, res1; 1937b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 src0, src1, src2, src3, reg0, reg1, reg2, reg3; 1938b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 vec0, vec1, vec2, vec3, vec4, vec5; 1939b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 dst0; 1940b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x70 = (v8u16)__msa_ldi_h(0x70); 1941b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x4A = (v8u16)__msa_ldi_h(0x4A); 1942b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x26 = (v8u16)__msa_ldi_h(0x26); 1943b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x5E = (v8u16)__msa_ldi_h(0x5E); 1944b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x12 = (v8u16)__msa_ldi_h(0x12); 1945b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_32896 = (v8u16)__msa_fill_h(0x8080); 1946b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x1F = (v8u16)__msa_ldi_h(0x1F); 1947b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x3F = (v8u16)__msa_fill_h(0x3F); 1948b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 1949b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 16) { 1950b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v8u16)__msa_ld_b((v8i16*)s, 0); 1951b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v8u16)__msa_ld_b((v8i16*)s, 16); 1952b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v8u16)__msa_ld_b((v8i16*)t, 0); 1953b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v8u16)__msa_ld_b((v8i16*)t, 16); 1954b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = src0 & const_0x1F; 1955b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = src1 & const_0x1F; 1956b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 += src2 & const_0x1F; 1957b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 += src3 & const_0x1F; 1958b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); 1959b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v8u16)__msa_srai_h((v8i16)src0, 5); 1960b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v8u16)__msa_srai_h((v8i16)src1, 5); 1961b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v8u16)__msa_srai_h((v8i16)src2, 5); 1962b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v8u16)__msa_srai_h((v8i16)src3, 5); 1963b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = src0 & const_0x3F; 1964b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = src1 & const_0x3F; 1965b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 += src2 & const_0x3F; 1966b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 += src3 & const_0x3F; 1967b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8u16)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); 1968b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v8u16)__msa_srai_h((v8i16)src0, 6); 1969b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v8u16)__msa_srai_h((v8i16)src1, 6); 1970b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v8u16)__msa_srai_h((v8i16)src2, 6); 1971b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v8u16)__msa_srai_h((v8i16)src3, 6); 1972b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = src0 & const_0x1F; 1973b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 = src1 & const_0x1F; 1974b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 += src2 & const_0x1F; 1975b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 += src3 & const_0x1F; 1976b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v8u16)__msa_pckev_b((v16i8)vec5, (v16i8)vec4); 1977b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = __msa_hadd_u_h((v16u8)vec0, (v16u8)vec0); 1978b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = __msa_hadd_u_h((v16u8)vec1, (v16u8)vec1); 1979b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = __msa_hadd_u_h((v16u8)vec2, (v16u8)vec2); 1980b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = (v8u16)__msa_slli_h((v8i16)vec0, 1); 1981b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 |= (v8u16)__msa_srai_h((v8i16)vec0, 6); 1982b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = (v8u16)__msa_slli_h((v8i16)vec2, 1); 1983b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 |= (v8u16)__msa_srai_h((v8i16)vec2, 6); 1984b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = vec3 * const_0x70; 1985b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = vec1 * const_0x4A; 1986b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = vec4 * const_0x70; 1987b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = vec1 * const_0x5E; 1988b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 += const_32896; 1989b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 += vec4 * const_0x26; 1990b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 += const_32896; 1991b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 += vec3 * const_0x12; 1992b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 -= reg1; 1993b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 -= reg3; 1994b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 8); 1995b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v8u16)__msa_srai_h((v8i16)reg2, 8); 1996b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_pckev_b((v16i8)reg2, (v16i8)reg0); 1997b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res0 = __msa_copy_u_d((v2i64)dst0, 0); 1998b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res1 = __msa_copy_u_d((v2i64)dst0, 1); 1999b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard SD(res0, dst_u); 2000b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard SD(res1, dst_v); 2001b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard s += 16; 2002b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard t += 16; 2003b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_u += 8; 2004b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_v += 8; 2005b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 2006b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 2007b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2008b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid RGB24ToUVRow_MSA(const uint8* src_rgb0, 2009b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int src_stride_rgb, 2010b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_u, 2011b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_v, 2012b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 2013b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 2014b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* s = src_rgb0; 2015b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* t = src_rgb0 + src_stride_rgb; 2016b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int64 res0, res1; 2017b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, src3, src4, src5, src6, src7; 2018b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 inp0, inp1, inp2, inp3, inp4, inp5; 2019b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; 2020b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8i16 reg0, reg1, reg2, reg3; 2021b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 dst0; 2022b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x70 = (v8u16)__msa_fill_h(0x70); 2023b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x4A = (v8u16)__msa_fill_h(0x4A); 2024b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x26 = (v8u16)__msa_fill_h(0x26); 2025b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x5E = (v8u16)__msa_fill_h(0x5E); 2026b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x12 = (v8u16)__msa_fill_h(0x12); 2027b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080); 2028b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 mask = {0, 1, 2, 16, 3, 4, 5, 17, 6, 7, 8, 18, 9, 10, 11, 19}; 2029b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 zero = {0}; 2030b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2031b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 16) { 2032b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard inp0 = (v16u8)__msa_ld_b((v16i8*)s, 0); 2033b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard inp1 = (v16u8)__msa_ld_b((v16i8*)s, 16); 2034b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard inp2 = (v16u8)__msa_ld_b((v16i8*)s, 32); 2035b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard inp3 = (v16u8)__msa_ld_b((v16i8*)t, 0); 2036b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard inp4 = (v16u8)__msa_ld_b((v16i8*)t, 16); 2037b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard inp5 = (v16u8)__msa_ld_b((v16i8*)t, 32); 2038b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_sldi_b((v16i8)inp1, (v16i8)inp0, 12); 2039b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src5 = (v16u8)__msa_sldi_b((v16i8)inp4, (v16i8)inp3, 12); 2040b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_sldi_b((v16i8)inp2, (v16i8)inp1, 8); 2041b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src6 = (v16u8)__msa_sldi_b((v16i8)inp5, (v16i8)inp4, 8); 2042b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v16u8)__msa_sldi_b((v16i8)inp2, (v16i8)inp2, 4); 2043b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src7 = (v16u8)__msa_sldi_b((v16i8)inp5, (v16i8)inp5, 4); 2044b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)inp0); 2045b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src1); 2046b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src2); 2047b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src3); 2048b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src4 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)inp3); 2049b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src5 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src5); 2050b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src6 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src6); 2051b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src7 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src7); 2052b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_ilvr_b((v16i8)src4, (v16i8)src0); 2053b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8u16)__msa_ilvl_b((v16i8)src4, (v16i8)src0); 2054b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v8u16)__msa_ilvr_b((v16i8)src5, (v16i8)src1); 2055b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = (v8u16)__msa_ilvl_b((v16i8)src5, (v16i8)src1); 2056b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = (v8u16)__msa_ilvr_b((v16i8)src6, (v16i8)src2); 2057b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 = (v8u16)__msa_ilvl_b((v16i8)src6, (v16i8)src2); 2058b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec6 = (v8u16)__msa_ilvr_b((v16i8)src7, (v16i8)src3); 2059b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec7 = (v8u16)__msa_ilvl_b((v16i8)src7, (v16i8)src3); 2060b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_hadd_u_h((v16u8)vec0, (v16u8)vec0); 2061b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8u16)__msa_hadd_u_h((v16u8)vec1, (v16u8)vec1); 2062b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v8u16)__msa_hadd_u_h((v16u8)vec2, (v16u8)vec2); 2063b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = (v8u16)__msa_hadd_u_h((v16u8)vec3, (v16u8)vec3); 2064b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = (v8u16)__msa_hadd_u_h((v16u8)vec4, (v16u8)vec4); 2065b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 = (v8u16)__msa_hadd_u_h((v16u8)vec5, (v16u8)vec5); 2066b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec6 = (v8u16)__msa_hadd_u_h((v16u8)vec6, (v16u8)vec6); 2067b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec7 = (v8u16)__msa_hadd_u_h((v16u8)vec7, (v16u8)vec7); 2068b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v8i16)__msa_pckev_d((v2i64)vec1, (v2i64)vec0); 2069b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v8i16)__msa_pckev_d((v2i64)vec3, (v2i64)vec2); 2070b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v8i16)__msa_pckev_d((v2i64)vec5, (v2i64)vec4); 2071b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = (v8i16)__msa_pckev_d((v2i64)vec7, (v2i64)vec6); 2072b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 += (v8i16)__msa_pckod_d((v2i64)vec1, (v2i64)vec0); 2073b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 += (v8i16)__msa_pckod_d((v2i64)vec3, (v2i64)vec2); 2074b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 += (v8i16)__msa_pckod_d((v2i64)vec5, (v2i64)vec4); 2075b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 += (v8i16)__msa_pckod_d((v2i64)vec7, (v2i64)vec6); 2076b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = __msa_srai_h((v8i16)reg0, 2); 2077b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = __msa_srai_h((v8i16)reg1, 2); 2078b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = __msa_srai_h((v8i16)reg2, 2); 2079b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = __msa_srai_h((v8i16)reg3, 2); 2080b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = (v8u16)__msa_pckev_h(reg1, reg0); 2081b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 = (v8u16)__msa_pckev_h(reg3, reg2); 2082b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec6 = (v8u16)__msa_pckod_h(reg1, reg0); 2083b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec7 = (v8u16)__msa_pckod_h(reg3, reg2); 2084b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_pckev_h((v8i16)vec5, (v8i16)vec4); 2085b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8u16)__msa_pckev_h((v8i16)vec7, (v8i16)vec6); 2086b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v8u16)__msa_pckod_h((v8i16)vec5, (v8i16)vec4); 2087b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = vec0 * const_0x70; 2088b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = vec1 * const_0x4A; 2089b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 = vec2 * const_0x26; 2090b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 *= const_0x70; 2091b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 *= const_0x5E; 2092b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 *= const_0x12; 2093b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = __msa_subv_h((v8i16)vec3, (v8i16)vec4); 2094b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = __msa_subv_h((v8i16)const_0x8080, (v8i16)vec5); 2095b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = __msa_subv_h((v8i16)vec2, (v8i16)vec1); 2096b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = __msa_subv_h((v8i16)const_0x8080, (v8i16)vec0); 2097b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 += reg1; 2098b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 += reg3; 2099b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = __msa_srai_h(reg0, 8); 2100b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = __msa_srai_h(reg2, 8); 2101b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_pckev_b((v16i8)reg2, (v16i8)reg0); 2102b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res0 = __msa_copy_u_d((v2i64)dst0, 0); 2103b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res1 = __msa_copy_u_d((v2i64)dst0, 1); 2104b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard SD(res0, dst_u); 2105b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard SD(res1, dst_v); 2106b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard t += 48; 2107b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard s += 48; 2108b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_u += 8; 2109b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_v += 8; 2110b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 2111b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 2112b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2113b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid RAWToUVRow_MSA(const uint8* src_rgb0, 2114b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int src_stride_rgb, 2115b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_u, 2116b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_v, 2117b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 2118b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 2119b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* s = src_rgb0; 2120b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* t = src_rgb0 + src_stride_rgb; 2121b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int64 res0, res1; 2122b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 inp0, inp1, inp2, inp3, inp4, inp5; 2123b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, src3, src4, src5, src6, src7; 2124b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; 2125b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8i16 reg0, reg1, reg2, reg3; 2126b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 dst0; 2127b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x70 = (v8u16)__msa_fill_h(0x70); 2128b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x4A = (v8u16)__msa_fill_h(0x4A); 2129b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x26 = (v8u16)__msa_fill_h(0x26); 2130b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x5E = (v8u16)__msa_fill_h(0x5E); 2131b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x12 = (v8u16)__msa_fill_h(0x12); 2132b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080); 2133b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 mask = {0, 1, 2, 16, 3, 4, 5, 17, 6, 7, 8, 18, 9, 10, 11, 19}; 2134b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 zero = {0}; 2135b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2136b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 16) { 2137b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard inp0 = (v16u8)__msa_ld_b((v16i8*)s, 0); 2138b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard inp1 = (v16u8)__msa_ld_b((v16i8*)s, 16); 2139b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard inp2 = (v16u8)__msa_ld_b((v16i8*)s, 32); 2140b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard inp3 = (v16u8)__msa_ld_b((v16i8*)t, 0); 2141b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard inp4 = (v16u8)__msa_ld_b((v16i8*)t, 16); 2142b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard inp5 = (v16u8)__msa_ld_b((v16i8*)t, 32); 2143b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_sldi_b((v16i8)inp1, (v16i8)inp0, 12); 2144b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src5 = (v16u8)__msa_sldi_b((v16i8)inp4, (v16i8)inp3, 12); 2145b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_sldi_b((v16i8)inp2, (v16i8)inp1, 8); 2146b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src6 = (v16u8)__msa_sldi_b((v16i8)inp5, (v16i8)inp4, 8); 2147b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v16u8)__msa_sldi_b((v16i8)inp2, (v16i8)inp2, 4); 2148b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src7 = (v16u8)__msa_sldi_b((v16i8)inp5, (v16i8)inp5, 4); 2149b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)inp0); 2150b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src1); 2151b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src2); 2152b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src3); 2153b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src4 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)inp3); 2154b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src5 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src5); 2155b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src6 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src6); 2156b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src7 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src7); 2157b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_ilvr_b((v16i8)src4, (v16i8)src0); 2158b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8u16)__msa_ilvl_b((v16i8)src4, (v16i8)src0); 2159b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v8u16)__msa_ilvr_b((v16i8)src5, (v16i8)src1); 2160b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = (v8u16)__msa_ilvl_b((v16i8)src5, (v16i8)src1); 2161b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = (v8u16)__msa_ilvr_b((v16i8)src6, (v16i8)src2); 2162b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 = (v8u16)__msa_ilvl_b((v16i8)src6, (v16i8)src2); 2163b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec6 = (v8u16)__msa_ilvr_b((v16i8)src7, (v16i8)src3); 2164b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec7 = (v8u16)__msa_ilvl_b((v16i8)src7, (v16i8)src3); 2165b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_hadd_u_h((v16u8)vec0, (v16u8)vec0); 2166b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8u16)__msa_hadd_u_h((v16u8)vec1, (v16u8)vec1); 2167b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v8u16)__msa_hadd_u_h((v16u8)vec2, (v16u8)vec2); 2168b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = (v8u16)__msa_hadd_u_h((v16u8)vec3, (v16u8)vec3); 2169b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = (v8u16)__msa_hadd_u_h((v16u8)vec4, (v16u8)vec4); 2170b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 = (v8u16)__msa_hadd_u_h((v16u8)vec5, (v16u8)vec5); 2171b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec6 = (v8u16)__msa_hadd_u_h((v16u8)vec6, (v16u8)vec6); 2172b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec7 = (v8u16)__msa_hadd_u_h((v16u8)vec7, (v16u8)vec7); 2173b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v8i16)__msa_pckev_d((v2i64)vec1, (v2i64)vec0); 2174b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v8i16)__msa_pckev_d((v2i64)vec3, (v2i64)vec2); 2175b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v8i16)__msa_pckev_d((v2i64)vec5, (v2i64)vec4); 2176b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = (v8i16)__msa_pckev_d((v2i64)vec7, (v2i64)vec6); 2177b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 += (v8i16)__msa_pckod_d((v2i64)vec1, (v2i64)vec0); 2178b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 += (v8i16)__msa_pckod_d((v2i64)vec3, (v2i64)vec2); 2179b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 += (v8i16)__msa_pckod_d((v2i64)vec5, (v2i64)vec4); 2180b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 += (v8i16)__msa_pckod_d((v2i64)vec7, (v2i64)vec6); 2181b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = __msa_srai_h(reg0, 2); 2182b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = __msa_srai_h(reg1, 2); 2183b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = __msa_srai_h(reg2, 2); 2184b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = __msa_srai_h(reg3, 2); 2185b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); 2186b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); 2187b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec6 = (v8u16)__msa_pckod_h((v8i16)reg1, (v8i16)reg0); 2188b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec7 = (v8u16)__msa_pckod_h((v8i16)reg3, (v8i16)reg2); 2189b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_pckod_h((v8i16)vec5, (v8i16)vec4); 2190b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8u16)__msa_pckev_h((v8i16)vec7, (v8i16)vec6); 2191b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v8u16)__msa_pckev_h((v8i16)vec5, (v8i16)vec4); 2192b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = vec0 * const_0x70; 2193b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec4 = vec1 * const_0x4A; 2194b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec5 = vec2 * const_0x26; 2195b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 *= const_0x70; 2196b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 *= const_0x5E; 2197b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 *= const_0x12; 2198b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = __msa_subv_h((v8i16)vec3, (v8i16)vec4); 2199b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = __msa_subv_h((v8i16)const_0x8080, (v8i16)vec5); 2200b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = __msa_subv_h((v8i16)vec2, (v8i16)vec1); 2201b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = __msa_subv_h((v8i16)const_0x8080, (v8i16)vec0); 2202b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 += reg1; 2203b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 += reg3; 2204b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = __msa_srai_h(reg0, 8); 2205b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = __msa_srai_h(reg2, 8); 2206b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_pckev_b((v16i8)reg2, (v16i8)reg0); 2207b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res0 = __msa_copy_u_d((v2i64)dst0, 0); 2208b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res1 = __msa_copy_u_d((v2i64)dst0, 1); 2209b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard SD(res0, dst_u); 2210b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard SD(res1, dst_v); 2211b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard t += 48; 2212b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard s += 48; 2213b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_u += 8; 2214b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_v += 8; 2215b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 2216b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 2217b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2218b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid NV12ToARGBRow_MSA(const uint8* src_y, 2219b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_uv, 2220b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* rgb_buf, 2221b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const struct YuvConstants* yuvconstants, 2222b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 2223b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 2224b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint64 val0, val1; 2225b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, res0, res1, dst0, dst1; 2226b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8i16 vec0, vec1, vec2; 2227b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; 2228b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 vec_ubvr, vec_ugvg; 2229b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 zero = {0}; 2230b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); 2231b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2232b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, 2233b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_br, vec_yg); 2234b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); 2235b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); 2236b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2237b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 8) { 2238b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard val0 = LD(src_y); 2239b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard val1 = LD(src_uv); 2240b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_insert_d((v2i64)zero, 0, val0); 2241b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_insert_d((v2i64)zero, 0, val1); 2242b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, 2243b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0, vec1, vec2); 2244b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res0 = (v16u8)__msa_ilvev_b((v16i8)vec2, (v16i8)vec0); 2245b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res1 = (v16u8)__msa_ilvev_b((v16i8)alpha, (v16i8)vec1); 2246b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_ilvr_b((v16i8)res1, (v16i8)res0); 2247b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_ilvl_b((v16i8)res1, (v16i8)res0); 2248b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB2(dst0, dst1, rgb_buf, 16); 2249b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_y += 8; 2250b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_uv += 8; 2251b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard rgb_buf += 32; 2252b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 2253b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 2254b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2255b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid NV12ToRGB565Row_MSA(const uint8* src_y, 2256b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_uv, 2257b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* rgb_buf, 2258b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const struct YuvConstants* yuvconstants, 2259b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 2260b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 2261b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint64 val0, val1; 2262b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, dst0; 2263b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8i16 vec0, vec1, vec2; 2264b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; 2265b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 vec_ubvr, vec_ugvg; 2266b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 zero = {0}; 2267b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2268b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, 2269b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_br, vec_yg); 2270b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); 2271b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); 2272b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2273b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 8) { 2274b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard val0 = LD(src_y); 2275b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard val1 = LD(src_uv); 2276b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_insert_d((v2i64)zero, 0, val0); 2277b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_insert_d((v2i64)zero, 0, val1); 2278b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, 2279b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0, vec1, vec2); 2280b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = vec0 >> 3; 2281b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (vec1 >> 2) << 5; 2282b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (vec2 >> 3) << 11; 2283b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)(vec0 | vec1 | vec2); 2284b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, rgb_buf); 2285b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_y += 8; 2286b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_uv += 8; 2287b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard rgb_buf += 16; 2288b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 2289b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 2290b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2291b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid NV21ToARGBRow_MSA(const uint8* src_y, 2292b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_vu, 2293b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* rgb_buf, 2294b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const struct YuvConstants* yuvconstants, 2295b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 2296b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 2297b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint64 val0, val1; 2298b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, res0, res1, dst0, dst1; 2299b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8i16 vec0, vec1, vec2; 2300b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; 2301b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 vec_ubvr, vec_ugvg; 2302b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); 2303b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 zero = {0}; 2304b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}; 2305b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2306b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, 2307b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_br, vec_yg); 2308b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); 2309b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); 2310b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2311b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 8) { 2312b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard val0 = LD(src_y); 2313b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard val1 = LD(src_vu); 2314b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_insert_d((v2i64)zero, 0, val0); 2315b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_insert_d((v2i64)zero, 0, val1); 2316b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src1, (v16i8)src1); 2317b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, 2318b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0, vec1, vec2); 2319b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res0 = (v16u8)__msa_ilvev_b((v16i8)vec2, (v16i8)vec0); 2320b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res1 = (v16u8)__msa_ilvev_b((v16i8)alpha, (v16i8)vec1); 2321b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_ilvr_b((v16i8)res1, (v16i8)res0); 2322b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_ilvl_b((v16i8)res1, (v16i8)res0); 2323b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB2(dst0, dst1, rgb_buf, 16); 2324b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_y += 8; 2325b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_vu += 8; 2326b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard rgb_buf += 32; 2327b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 2328b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 2329b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2330b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid SobelRow_MSA(const uint8* src_sobelx, 2331b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_sobely, 2332b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_argb, 2333b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 2334b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 2335b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, vec0, dst0, dst1, dst2, dst3; 2336b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 mask0 = {0, 0, 0, 16, 1, 1, 1, 16, 2, 2, 2, 16, 3, 3, 3, 16}; 2337b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 const_0x4 = __msa_ldi_b(0x4); 2338b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 mask1 = mask0 + const_0x4; 2339b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 mask2 = mask1 + const_0x4; 2340b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 mask3 = mask2 + const_0x4; 2341b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); 2342b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2343b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 16) { 2344b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)src_sobelx, 0); 2345b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16i8*)src_sobely, 0); 2346b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = __msa_adds_u_b(src0, src1); 2347b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_vshf_b(mask0, (v16i8)alpha, (v16i8)vec0); 2348b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_vshf_b(mask1, (v16i8)alpha, (v16i8)vec0); 2349b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst2 = (v16u8)__msa_vshf_b(mask2, (v16i8)alpha, (v16i8)vec0); 2350b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst3 = (v16u8)__msa_vshf_b(mask3, (v16i8)alpha, (v16i8)vec0); 2351b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); 2352b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_sobelx += 16; 2353b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_sobely += 16; 2354b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_argb += 64; 2355b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 2356b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 2357b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2358b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid SobelToPlaneRow_MSA(const uint8* src_sobelx, 2359b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_sobely, 2360b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_y, 2361b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 2362b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 2363b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, src3, dst0, dst1; 2364b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2365b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 32) { 2366b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)src_sobelx, 0); 2367b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16i8*)src_sobelx, 16); 2368b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_ld_b((v16i8*)src_sobely, 0); 2369b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v16u8)__msa_ld_b((v16i8*)src_sobely, 16); 2370b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = __msa_adds_u_b(src0, src2); 2371b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = __msa_adds_u_b(src1, src3); 2372b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB2(dst0, dst1, dst_y, 16); 2373b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_sobelx += 32; 2374b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_sobely += 32; 2375b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_y += 32; 2376b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 2377b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 2378b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2379b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid SobelXYRow_MSA(const uint8* src_sobelx, 2380b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_sobely, 2381b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_argb, 2382b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 2383b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 2384b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, vec0, vec1, vec2; 2385b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 reg0, reg1, dst0, dst1, dst2, dst3; 2386b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); 2387b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2388b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 16) { 2389b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)src_sobelx, 0); 2390b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16i8*)src_sobely, 0); 2391b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = __msa_adds_u_b(src0, src1); 2392b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v16u8)__msa_ilvr_b((v16i8)src0, (v16i8)src1); 2393b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v16u8)__msa_ilvl_b((v16i8)src0, (v16i8)src1); 2394b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v16u8)__msa_ilvr_b((v16i8)alpha, (v16i8)vec0); 2395b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v16u8)__msa_ilvl_b((v16i8)alpha, (v16i8)vec0); 2396b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_ilvr_b((v16i8)reg0, (v16i8)vec1); 2397b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_ilvl_b((v16i8)reg0, (v16i8)vec1); 2398b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst2 = (v16u8)__msa_ilvr_b((v16i8)reg1, (v16i8)vec2); 2399b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst3 = (v16u8)__msa_ilvl_b((v16i8)reg1, (v16i8)vec2); 2400b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); 2401b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_sobelx += 16; 2402b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_sobely += 16; 2403b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_argb += 64; 2404b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 2405b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 2406b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2407b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid ARGBToYJRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) { 2408b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 2409b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, src3, dst0; 2410b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 const_0x4B0F = (v16u8)__msa_fill_h(0x4B0F); 2411b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 const_0x26 = (v16u8)__msa_fill_h(0x26); 2412b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x40 = (v8u16)__msa_fill_h(0x40); 2413b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2414b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 16) { 2415b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0); 2416b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16); 2417b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 32); 2418b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 48); 2419b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ARGBTOY(src0, src1, src2, src3, const_0x4B0F, const_0x26, const_0x40, 7, 2420b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0); 2421b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, dst_y); 2422b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_argb0 += 64; 2423b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_y += 16; 2424b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 2425b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 2426b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2427b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid BGRAToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) { 2428b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 2429b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, src3, dst0; 2430b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 const_0x4200 = (v16u8)__msa_fill_h(0x4200); 2431b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 const_0x1981 = (v16u8)__msa_fill_h(0x1981); 2432b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); 2433b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2434b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 16) { 2435b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0); 2436b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16); 2437b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 32); 2438b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 48); 2439b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ARGBTOY(src0, src1, src2, src3, const_0x4200, const_0x1981, const_0x1080, 8, 2440b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0); 2441b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, dst_y); 2442b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_argb0 += 64; 2443b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_y += 16; 2444b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 2445b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 2446b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2447b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid ABGRToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) { 2448b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 2449b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, src3, dst0; 2450b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 const_0x8142 = (v16u8)__msa_fill_h(0x8142); 2451b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 const_0x19 = (v16u8)__msa_fill_h(0x19); 2452b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); 2453b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2454b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 16) { 2455b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0); 2456b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16); 2457b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 32); 2458b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 48); 2459b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ARGBTOY(src0, src1, src2, src3, const_0x8142, const_0x19, const_0x1080, 8, 2460b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0); 2461b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, dst_y); 2462b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_argb0 += 64; 2463b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_y += 16; 2464b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 2465b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 2466b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2467b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid RGBAToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) { 2468b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 2469b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, src3, dst0; 2470b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 const_0x1900 = (v16u8)__msa_fill_h(0x1900); 2471b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 const_0x4281 = (v16u8)__msa_fill_h(0x4281); 2472b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); 2473b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2474b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 16) { 2475b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0); 2476b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16); 2477b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 32); 2478b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 48); 2479b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ARGBTOY(src0, src1, src2, src3, const_0x1900, const_0x4281, const_0x1080, 8, 2480b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0); 2481b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, dst_y); 2482b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_argb0 += 64; 2483b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_y += 16; 2484b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 2485b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 2486b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2487b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid ARGBToUVJRow_MSA(const uint8* src_rgb0, 2488b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int src_stride_rgb, 2489b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_u, 2490b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_v, 2491b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 2492b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 2493b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* s = src_rgb0; 2494b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* t = src_rgb0 + src_stride_rgb; 2495b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, src3, src4, src5, src6, src7; 2496b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 vec0, vec1, vec2, vec3; 2497b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 dst0, dst1; 2498b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler0 = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}; 2499b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler1 = {2, 3, 6, 7, 10, 11, 14, 15, 2500b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 18, 19, 22, 23, 26, 27, 30, 31}; 2501b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler2 = {0, 3, 4, 7, 8, 11, 12, 15, 16, 19, 20, 23, 24, 27, 28, 31}; 2502b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler3 = {1, 2, 5, 6, 9, 10, 13, 14, 17, 18, 21, 22, 25, 26, 29, 30}; 2503b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 const_0x7F = (v16u8)__msa_fill_h(0x7F); 2504b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 const_0x6B14 = (v16u8)__msa_fill_h(0x6B14); 2505b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 const_0x2B54 = (v16u8)__msa_fill_h(0x2B54); 2506b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080); 2507b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2508b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 32) { 2509b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); 2510b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16i8*)s, 16); 2511b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_ld_b((v16i8*)s, 32); 2512b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v16u8)__msa_ld_b((v16i8*)s, 48); 2513b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src4 = (v16u8)__msa_ld_b((v16i8*)t, 0); 2514b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src5 = (v16u8)__msa_ld_b((v16i8*)t, 16); 2515b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src6 = (v16u8)__msa_ld_b((v16i8*)t, 32); 2516b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src7 = (v16u8)__msa_ld_b((v16i8*)t, 48); 2517b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = __msa_aver_u_b(src0, src4); 2518b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = __msa_aver_u_b(src1, src5); 2519b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = __msa_aver_u_b(src2, src6); 2520b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = __msa_aver_u_b(src3, src7); 2521b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src4 = (v16u8)__msa_pckev_w((v4i32)src1, (v4i32)src0); 2522b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src5 = (v16u8)__msa_pckev_w((v4i32)src3, (v4i32)src2); 2523b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src6 = (v16u8)__msa_pckod_w((v4i32)src1, (v4i32)src0); 2524b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src7 = (v16u8)__msa_pckod_w((v4i32)src3, (v4i32)src2); 2525b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = __msa_aver_u_b(src4, src6); 2526b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = __msa_aver_u_b(src5, src7); 2527b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)s, 64); 2528b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16i8*)s, 80); 2529b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_ld_b((v16i8*)s, 96); 2530b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v16u8)__msa_ld_b((v16i8*)s, 112); 2531b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src4 = (v16u8)__msa_ld_b((v16i8*)t, 64); 2532b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src5 = (v16u8)__msa_ld_b((v16i8*)t, 80); 2533b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src6 = (v16u8)__msa_ld_b((v16i8*)t, 96); 2534b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src7 = (v16u8)__msa_ld_b((v16i8*)t, 112); 2535b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = __msa_aver_u_b(src0, src4); 2536b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = __msa_aver_u_b(src1, src5); 2537b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = __msa_aver_u_b(src2, src6); 2538b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = __msa_aver_u_b(src3, src7); 2539b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src4 = (v16u8)__msa_pckev_w((v4i32)src1, (v4i32)src0); 2540b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src5 = (v16u8)__msa_pckev_w((v4i32)src3, (v4i32)src2); 2541b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src6 = (v16u8)__msa_pckod_w((v4i32)src1, (v4i32)src0); 2542b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src7 = (v16u8)__msa_pckod_w((v4i32)src3, (v4i32)src2); 2543b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = __msa_aver_u_b(src4, src6); 2544b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = __msa_aver_u_b(src5, src7); 2545b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ARGBTOUV(vec0, vec1, vec2, vec3, const_0x6B14, const_0x7F, const_0x2B54, 2546b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const_0x8080, shuffler1, shuffler0, shuffler2, shuffler3, dst0, 2547b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1); 2548b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, dst_v); 2549b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst1, dst_u); 2550b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard s += 128; 2551b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard t += 128; 2552b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_v += 16; 2553b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_u += 16; 2554b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 2555b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 2556b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2557b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid BGRAToUVRow_MSA(const uint8* src_rgb0, 2558b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int src_stride_rgb, 2559b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_u, 2560b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_v, 2561b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 2562b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 2563b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* s = src_rgb0; 2564b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* t = src_rgb0 + src_stride_rgb; 2565b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 dst0, dst1, vec0, vec1, vec2, vec3; 2566b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler0 = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}; 2567b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler1 = {2, 3, 6, 7, 10, 11, 14, 15, 2568b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 18, 19, 22, 23, 26, 27, 30, 31}; 2569b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler2 = {0, 3, 4, 7, 8, 11, 12, 15, 16, 19, 20, 23, 24, 27, 28, 31}; 2570b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler3 = {2, 1, 6, 5, 10, 9, 14, 13, 18, 17, 22, 21, 26, 25, 30, 29}; 2571b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 const_0x125E = (v16u8)__msa_fill_h(0x125E); 2572b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 const_0x7000 = (v16u8)__msa_fill_h(0x7000); 2573b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 const_0x264A = (v16u8)__msa_fill_h(0x264A); 2574b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080); 2575b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2576b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 32) { 2577b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard READ_ARGB(s, t, vec0, vec1, vec2, vec3); 2578b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ARGBTOUV(vec0, vec1, vec2, vec3, const_0x125E, const_0x7000, const_0x264A, 2579b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const_0x8080, shuffler0, shuffler1, shuffler2, shuffler3, dst0, 2580b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1); 2581b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, dst_v); 2582b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst1, dst_u); 2583b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard s += 128; 2584b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard t += 128; 2585b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_v += 16; 2586b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_u += 16; 2587b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 2588b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 2589b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2590b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid ABGRToUVRow_MSA(const uint8* src_rgb0, 2591b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int src_stride_rgb, 2592b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_u, 2593b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_v, 2594b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 2595b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 2596b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* s = src_rgb0; 2597b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* t = src_rgb0 + src_stride_rgb; 2598b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, src3; 2599b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 dst0, dst1; 2600b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler0 = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}; 2601b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler1 = {2, 3, 6, 7, 10, 11, 14, 15, 2602b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 18, 19, 22, 23, 26, 27, 30, 31}; 2603b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler2 = {0, 3, 4, 7, 8, 11, 12, 15, 16, 19, 20, 23, 24, 27, 28, 31}; 2604b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler3 = {1, 2, 5, 6, 9, 10, 13, 14, 17, 18, 21, 22, 25, 26, 29, 30}; 2605b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 const_0x4A26 = (v16u8)__msa_fill_h(0x4A26); 2606b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 const_0x0070 = (v16u8)__msa_fill_h(0x0070); 2607b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 const_0x125E = (v16u8)__msa_fill_h(0x125E); 2608b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080); 2609b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2610b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 32) { 2611b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard READ_ARGB(s, t, src0, src1, src2, src3); 2612b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ARGBTOUV(src0, src1, src2, src3, const_0x4A26, const_0x0070, const_0x125E, 2613b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const_0x8080, shuffler1, shuffler0, shuffler2, shuffler3, dst0, 2614b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1); 2615b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, dst_u); 2616b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst1, dst_v); 2617b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard s += 128; 2618b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard t += 128; 2619b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_u += 16; 2620b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_v += 16; 2621b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 2622b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 2623b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2624b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid RGBAToUVRow_MSA(const uint8* src_rgb0, 2625b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int src_stride_rgb, 2626b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_u, 2627b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_v, 2628b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 2629b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 2630b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* s = src_rgb0; 2631b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* t = src_rgb0 + src_stride_rgb; 2632b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 dst0, dst1, vec0, vec1, vec2, vec3; 2633b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler0 = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}; 2634b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler1 = {2, 3, 6, 7, 10, 11, 14, 15, 2635b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 18, 19, 22, 23, 26, 27, 30, 31}; 2636b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler2 = {0, 3, 4, 7, 8, 11, 12, 15, 16, 19, 20, 23, 24, 27, 28, 31}; 2637b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler3 = {2, 1, 6, 5, 10, 9, 14, 13, 18, 17, 22, 21, 26, 25, 30, 29}; 2638b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 const_0x125E = (v16u8)__msa_fill_h(0x264A); 2639b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 const_0x7000 = (v16u8)__msa_fill_h(0x7000); 2640b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 const_0x264A = (v16u8)__msa_fill_h(0x125E); 2641b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080); 2642b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2643b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 32) { 2644b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard READ_ARGB(s, t, vec0, vec1, vec2, vec3); 2645b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ARGBTOUV(vec0, vec1, vec2, vec3, const_0x125E, const_0x7000, const_0x264A, 2646b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const_0x8080, shuffler0, shuffler1, shuffler2, shuffler3, dst0, 2647b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1); 2648b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, dst_u); 2649b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst1, dst_v); 2650b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard s += 128; 2651b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard t += 128; 2652b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_u += 16; 2653b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_v += 16; 2654b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 2655b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 2656b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2657b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid I444ToARGBRow_MSA(const uint8* src_y, 2658b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_u, 2659b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_v, 2660b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* rgb_buf, 2661b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const struct YuvConstants* yuvconstants, 2662b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 2663b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 2664b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, dst0, dst1; 2665b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 vec0, vec1, vec2; 2666b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9; 2667b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; 2668b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); 2669b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8i16 zero = {0}; 2670b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2671b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, 2672b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_br, vec_yg); 2673b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2674b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 8) { 2675b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard READI444(src_y, src_u, src_v, src0, src1, src2); 2676b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0); 2677b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec0); 2678b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec0); 2679b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 *= vec_yg; 2680b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 *= vec_yg; 2681b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = __msa_srai_w(reg0, 16); 2682b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = __msa_srai_w(reg1, 16); 2683b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4 = reg0 + vec_br; 2684b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5 = reg1 + vec_br; 2685b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = reg0 + vec_bg; 2686b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = reg1 + vec_bg; 2687b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 += vec_bb; 2688b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 += vec_bb; 2689b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src1); 2690b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src2); 2691b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg6 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec0); 2692b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg7 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec0); 2693b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg8 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec1); 2694b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg9 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec1); 2695b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 -= reg6 * vec_ub; 2696b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 -= reg7 * vec_ub; 2697b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 -= reg6 * vec_ug; 2698b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 -= reg7 * vec_ug; 2699b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4 -= reg8 * vec_vr; 2700b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5 -= reg9 * vec_vr; 2701b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 -= reg8 * vec_vg; 2702b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 -= reg9 * vec_vg; 2703b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = __msa_srai_w(reg0, 6); 2704b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = __msa_srai_w(reg1, 6); 2705b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = __msa_srai_w(reg2, 6); 2706b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = __msa_srai_w(reg3, 6); 2707b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg4 = __msa_srai_w(reg4, 6); 2708b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg5 = __msa_srai_w(reg5, 6); 2709b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard CLIP_0TO255(reg0, reg1, reg2, reg3, reg4, reg5); 2710b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); 2711b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); 2712b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v8u16)__msa_pckev_h((v8i16)reg5, (v8i16)reg4); 2713b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0); 2714b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8u16)__msa_ilvev_b((v16i8)alpha, (v16i8)vec2); 2715b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_ilvr_h((v8i16)vec1, (v8i16)vec0); 2716b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_ilvl_h((v8i16)vec1, (v8i16)vec0); 2717b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB2(dst0, dst1, rgb_buf, 16); 2718b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_y += 8; 2719b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_u += 8; 2720b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_v += 8; 2721b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard rgb_buf += 32; 2722b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 2723b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 2724b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2725b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid I400ToARGBRow_MSA(const uint8* src_y, uint8* rgb_buf, int width) { 2726b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 2727b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, res0, res1, res2, res3, res4, dst0, dst1, dst2, dst3; 2728b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8i16 vec0, vec1; 2729b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 reg0, reg1, reg2, reg3; 2730b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 vec_yg = __msa_fill_w(0x4A35); 2731b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8i16 vec_ygb = __msa_fill_h(0xFB78); 2732b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); 2733b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8i16 max = __msa_ldi_h(0xFF); 2734b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8i16 zero = {0}; 2735b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2736b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 16) { 2737b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)src_y, 0); 2738b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8i16)__msa_ilvr_b((v16i8)src0, (v16i8)src0); 2739b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8i16)__msa_ilvl_b((v16i8)src0, (v16i8)src0); 2740b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = (v4i32)__msa_ilvr_h(zero, vec0); 2741b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = (v4i32)__msa_ilvl_h(zero, vec0); 2742b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = (v4i32)__msa_ilvr_h(zero, vec1); 2743b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = (v4i32)__msa_ilvl_h(zero, vec1); 2744b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 *= vec_yg; 2745b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 *= vec_yg; 2746b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 *= vec_yg; 2747b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 *= vec_yg; 2748b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg0 = __msa_srai_w(reg0, 16); 2749b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg1 = __msa_srai_w(reg1, 16); 2750b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg2 = __msa_srai_w(reg2, 16); 2751b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard reg3 = __msa_srai_w(reg3, 16); 2752b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8i16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); 2753b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8i16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); 2754b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 += vec_ygb; 2755b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 += vec_ygb; 2756b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = __msa_srai_h(vec0, 6); 2757b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = __msa_srai_h(vec1, 6); 2758b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = __msa_maxi_s_h(vec0, 0); 2759b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = __msa_maxi_s_h(vec1, 0); 2760b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = __msa_min_s_h(max, vec0); 2761b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = __msa_min_s_h(max, vec1); 2762b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); 2763b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res1 = (v16u8)__msa_ilvr_b((v16i8)res0, (v16i8)res0); 2764b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res2 = (v16u8)__msa_ilvl_b((v16i8)res0, (v16i8)res0); 2765b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res3 = (v16u8)__msa_ilvr_b((v16i8)alpha, (v16i8)res0); 2766b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard res4 = (v16u8)__msa_ilvl_b((v16i8)alpha, (v16i8)res0); 2767b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_ilvr_b((v16i8)res3, (v16i8)res1); 2768b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_ilvl_b((v16i8)res3, (v16i8)res1); 2769b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst2 = (v16u8)__msa_ilvr_b((v16i8)res4, (v16i8)res2); 2770b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst3 = (v16u8)__msa_ilvl_b((v16i8)res4, (v16i8)res2); 2771b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB4(dst0, dst1, dst2, dst3, rgb_buf, 16); 2772b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_y += 16; 2773b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard rgb_buf += 64; 2774b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 2775b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 2776b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2777b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid J400ToARGBRow_MSA(const uint8* src_y, uint8* dst_argb, int width) { 2778b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 2779b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, vec0, vec1, vec2, vec3, dst0, dst1, dst2, dst3; 2780b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); 2781b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2782b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 16) { 2783b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)src_y, 0); 2784b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v16u8)__msa_ilvr_b((v16i8)src0, (v16i8)src0); 2785b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v16u8)__msa_ilvl_b((v16i8)src0, (v16i8)src0); 2786b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v16u8)__msa_ilvr_b((v16i8)alpha, (v16i8)src0); 2787b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = (v16u8)__msa_ilvl_b((v16i8)alpha, (v16i8)src0); 2788b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_ilvr_b((v16i8)vec2, (v16i8)vec0); 2789b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_ilvl_b((v16i8)vec2, (v16i8)vec0); 2790b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst2 = (v16u8)__msa_ilvr_b((v16i8)vec3, (v16i8)vec1); 2791b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst3 = (v16u8)__msa_ilvl_b((v16i8)vec3, (v16i8)vec1); 2792b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); 2793b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_y += 16; 2794b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_argb += 64; 2795b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 2796b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 2797b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2798b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid YUY2ToARGBRow_MSA(const uint8* src_yuy2, 2799b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* rgb_buf, 2800b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const struct YuvConstants* yuvconstants, 2801b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 2802b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 2803b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2; 2804b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8i16 vec0, vec1, vec2; 2805b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; 2806b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 vec_ubvr, vec_ugvg; 2807b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); 2808b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2809b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, 2810b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_br, vec_yg); 2811b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); 2812b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); 2813b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2814b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 8) { 2815b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)src_yuy2, 0); 2816b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_pckev_b((v16i8)src0, (v16i8)src0); 2817b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_pckod_b((v16i8)src0, (v16i8)src0); 2818b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard YUVTORGB(src1, src2, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, 2819b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0, vec1, vec2); 2820b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard STOREARGB(vec0, vec1, vec2, alpha, rgb_buf); 2821b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_yuy2 += 16; 2822b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard rgb_buf += 32; 2823b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 2824b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 2825b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2826b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid UYVYToARGBRow_MSA(const uint8* src_uyvy, 2827b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* rgb_buf, 2828b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const struct YuvConstants* yuvconstants, 2829b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 2830b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 2831b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2; 2832b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8i16 vec0, vec1, vec2; 2833b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; 2834b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v4i32 vec_ubvr, vec_ugvg; 2835b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); 2836b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2837b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, 2838b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_br, vec_yg); 2839b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); 2840b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); 2841b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2842b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 8) { 2843b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)src_uyvy, 0); 2844b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_pckod_b((v16i8)src0, (v16i8)src0); 2845b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_pckev_b((v16i8)src0, (v16i8)src0); 2846b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard YUVTORGB(src1, src2, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, 2847b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0, vec1, vec2); 2848b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard STOREARGB(vec0, vec1, vec2, alpha, rgb_buf); 2849b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_uyvy += 16; 2850b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard rgb_buf += 32; 2851b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 2852b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 2853b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2854b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid InterpolateRow_MSA(uint8* dst_ptr, 2855b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_ptr, 2856b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ptrdiff_t src_stride, 2857b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width, 2858b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int32 source_y_fraction) { 2859b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int32 y1_fraction = source_y_fraction; 2860b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int32 y0_fraction = 256 - y1_fraction; 2861b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint16 y_fractions; 2862b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* s = src_ptr; 2863b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* t = src_ptr + src_stride; 2864b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 2865b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, src3, dst0, dst1; 2866b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v8u16 vec0, vec1, vec2, vec3, y_frac; 2867b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2868b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard if (0 == y1_fraction) { 2869b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard memcpy(dst_ptr, src_ptr, width); 2870b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard return; 2871b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 2872b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2873b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard if (128 == y1_fraction) { 2874b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 32) { 2875b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); 2876b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16i8*)s, 16); 2877b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_ld_b((v16i8*)t, 0); 2878b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v16u8)__msa_ld_b((v16i8*)t, 16); 2879b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = __msa_aver_u_b(src0, src2); 2880b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = __msa_aver_u_b(src1, src3); 2881b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB2(dst0, dst1, dst_ptr, 16); 2882b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard s += 32; 2883b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard t += 32; 2884b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_ptr += 32; 2885b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 2886b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard return; 2887b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 2888b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2889b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard y_fractions = (uint16)(y0_fraction + (y1_fraction << 8)); 2890b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard y_frac = (v8u16)__msa_fill_h(y_fractions); 2891b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2892b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 32) { 2893b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); 2894b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16i8*)s, 16); 2895b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_ld_b((v16i8*)t, 0); 2896b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v16u8)__msa_ld_b((v16i8*)t, 16); 2897b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_ilvr_b((v16i8)src2, (v16i8)src0); 2898b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8u16)__msa_ilvl_b((v16i8)src2, (v16i8)src0); 2899b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v8u16)__msa_ilvr_b((v16i8)src3, (v16i8)src1); 2900b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = (v8u16)__msa_ilvl_b((v16i8)src3, (v16i8)src1); 2901b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_dotp_u_h((v16u8)vec0, (v16u8)y_frac); 2902b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8u16)__msa_dotp_u_h((v16u8)vec1, (v16u8)y_frac); 2903b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v8u16)__msa_dotp_u_h((v16u8)vec2, (v16u8)y_frac); 2904b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = (v8u16)__msa_dotp_u_h((v16u8)vec3, (v16u8)y_frac); 2905b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec0 = (v8u16)__msa_srari_h((v8i16)vec0, 8); 2906b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec1 = (v8u16)__msa_srari_h((v8i16)vec1, 8); 2907b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec2 = (v8u16)__msa_srari_h((v8i16)vec2, 8); 2908b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard vec3 = (v8u16)__msa_srari_h((v8i16)vec3, 8); 2909b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); 2910b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); 2911b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB2(dst0, dst1, dst_ptr, 16); 2912b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard s += 32; 2913b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard t += 32; 2914b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_ptr += 32; 2915b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 2916b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 2917b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2918b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid ARGBSetRow_MSA(uint8* dst_argb, uint32 v32, int width) { 2919b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 2920b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 dst0 = (v16u8)__msa_fill_w(v32); 2921b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2922b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 4) { 2923b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst0, dst_argb); 2924b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_argb += 16; 2925b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 2926b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 2927b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2928b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid RAWToRGB24Row_MSA(const uint8* src_raw, uint8* dst_rgb24, int width) { 2929b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 2930b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, src2, src3, src4, dst0, dst1, dst2; 2931b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler0 = {2, 1, 0, 5, 4, 3, 8, 7, 6, 11, 10, 9, 14, 13, 12, 17}; 2932b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler1 = {8, 7, 12, 11, 10, 15, 14, 13, 2933b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 18, 17, 16, 21, 20, 19, 24, 23}; 2934b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16i8 shuffler2 = {14, 19, 18, 17, 22, 21, 20, 25, 2935b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 24, 23, 28, 27, 26, 31, 30, 29}; 2936b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2937b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 16) { 2938b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)src_raw, 0); 2939b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16i8*)src_raw, 16); 2940b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src2 = (v16u8)__msa_ld_b((v16i8*)src_raw, 32); 2941b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src3 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src0, 8); 2942b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src4 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src1, 8); 2943b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0); 2944b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src4, (v16i8)src3); 2945b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src2, (v16i8)src1); 2946b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB2(dst0, dst1, dst_rgb24, 16); 2947b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB(dst2, (dst_rgb24 + 32)); 2948b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_raw += 48; 2949b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_rgb24 += 48; 2950b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 2951b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 2952b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2953b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchardvoid MergeUVRow_MSA(const uint8* src_u, 2954b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard const uint8* src_v, 2955b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_uv, 2956b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 2957b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int x; 2958b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard v16u8 src0, src1, dst0, dst1; 2959b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2960b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard for (x = 0; x < width; x += 16) { 2961b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src0 = (v16u8)__msa_ld_b((v16i8*)src_u, 0); 2962b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src1 = (v16u8)__msa_ld_b((v16i8*)src_v, 0); 2963b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst0 = (v16u8)__msa_ilvr_b((v16i8)src1, (v16i8)src0); 2964b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst1 = (v16u8)__msa_ilvl_b((v16i8)src1, (v16i8)src0); 2965b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_UB2(dst0, dst1, dst_uv, 16); 2966b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_u += 16; 2967b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard src_v += 16; 2968b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard dst_uv += 32; 2969b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 2970b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} 2971b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2972b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#ifdef __cplusplus 2973b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} // extern "C" 2974b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard} // namespace libyuv 2975b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#endif 2976b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 2977b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) 2978