1b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard/* 2b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard * Copyright 2016 The LibYuv Project Authors. All rights reserved. 3b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard * 4b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard * Use of this source code is governed by a BSD-style license 5b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard * that can be found in the LICENSE file in the root of the source 6b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard * tree. An additional intellectual property rights grant can be found 7b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard * in the file PATENTS. All contributing project authors may 8b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard * be found in the AUTHORS file in the root of the source tree. 9b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard */ 10b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 11b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#ifndef INCLUDE_LIBYUV_MACROS_MSA_H_ 12b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define INCLUDE_LIBYUV_MACROS_MSA_H_ 13b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 14b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) 15b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#include <msa.h> 16b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#include <stdint.h> 17b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 18b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#if (__mips_isa_rev >= 6) 19b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define LW(psrc) \ 20b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ({ \ 21b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* psrc_lw_m = (uint8*)(psrc); /* NOLINT */ \ 22b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint32 val_m; \ 23b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard asm volatile("lw %[val_m], %[psrc_lw_m] \n" \ 24b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard : [val_m] "=r"(val_m) \ 25b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard : [psrc_lw_m] "m"(*psrc_lw_m)); \ 26b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard val_m; \ 27b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard }) 28b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 29b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#if (__mips == 64) 30b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define LD(psrc) \ 31b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ({ \ 32b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \ 33b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint64 val_m = 0; \ 34b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard asm volatile("ld %[val_m], %[psrc_ld_m] \n" \ 35b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard : [val_m] "=r"(val_m) \ 36b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard : [psrc_ld_m] "m"(*psrc_ld_m)); \ 37b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard val_m; \ 38b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard }) 39b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#else // !(__mips == 64) 40b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define LD(psrc) \ 41b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ({ \ 42b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \ 43b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint32 val0_m, val1_m; \ 44b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint64 val_m = 0; \ 45b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard val0_m = LW(psrc_ld_m); \ 46b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard val1_m = LW(psrc_ld_m + 4); \ 47b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard val_m = (uint64)(val1_m); /* NOLINT */ \ 48b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \ 49b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard val_m = (uint64)(val_m | (uint64)val0_m); /* NOLINT */ \ 50b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard val_m; \ 51b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard }) 52b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#endif // (__mips == 64) 53b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 54b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define SW(val, pdst) \ 55b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ({ \ 56b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \ 57b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint32_t val_m = (val); \ 58b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard asm volatile("sw %[val_m], %[pdst_sw_m] \n" \ 59b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard : [pdst_sw_m] "=m"(*pdst_sw_m) \ 60b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard : [val_m] "r"(val_m)); \ 61b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard }) 62b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 63b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#if (__mips == 64) 64b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define SD(val, pdst) \ 65b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ({ \ 66b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \ 67b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint64_t val_m = (val); \ 68b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard asm volatile("sd %[val_m], %[pdst_sd_m] \n" \ 69b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard : [pdst_sd_m] "=m"(*pdst_sd_m) \ 70b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard : [val_m] "r"(val_m)); \ 71b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard }) 72b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#else // !(__mips == 64) 73b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define SD(val, pdst) \ 74b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ({ \ 75b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \ 76b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint32_t val0_m, val1_m; \ 77b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \ 78b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \ 79b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard SW(val0_m, pdst_sd_m); \ 80b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard SW(val1_m, pdst_sd_m + 4); \ 81b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard }) 82b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#endif // !(__mips == 64) 83b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#else // !(__mips_isa_rev >= 6) 84b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define LW(psrc) \ 85b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ({ \ 86b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* psrc_lw_m = (uint8*)(psrc); /* NOLINT */ \ 87b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint32 val_m; \ 88b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard asm volatile("ulw %[val_m], %[psrc_lw_m] \n" \ 89b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard : [val_m] "=r"(val_m) \ 90b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard : [psrc_lw_m] "m"(*psrc_lw_m)); \ 91b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard val_m; \ 92b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard }) 93b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 94b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#if (__mips == 64) 95b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define LD(psrc) \ 96b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ({ \ 97b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \ 98b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint64 val_m = 0; \ 99b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard asm volatile("uld %[val_m], %[psrc_ld_m] \n" \ 100b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard : [val_m] "=r"(val_m) \ 101b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard : [psrc_ld_m] "m"(*psrc_ld_m)); \ 102b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard val_m; \ 103b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard }) 104b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#else // !(__mips == 64) 105b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define LD(psrc) \ 106b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ({ \ 107b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \ 108b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint32 val0_m, val1_m; \ 109b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint64 val_m = 0; \ 110b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard val0_m = LW(psrc_ld_m); \ 111b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard val1_m = LW(psrc_ld_m + 4); \ 112b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard val_m = (uint64)(val1_m); /* NOLINT */ \ 113b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \ 114b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard val_m = (uint64)(val_m | (uint64)val0_m); /* NOLINT */ \ 115b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard val_m; \ 116b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard }) 117b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#endif // (__mips == 64) 118b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 119b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define SW(val, pdst) \ 120b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ({ \ 121b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \ 122b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint32_t val_m = (val); \ 123b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard asm volatile("usw %[val_m], %[pdst_sw_m] \n" \ 124b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard : [pdst_sw_m] "=m"(*pdst_sw_m) \ 125b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard : [val_m] "r"(val_m)); \ 126b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard }) 127b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 128b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define SD(val, pdst) \ 129b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ({ \ 130b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \ 131b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint32_t val0_m, val1_m; \ 132b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \ 133b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \ 134b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard SW(val0_m, pdst_sd_m); \ 135b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard SW(val1_m, pdst_sd_m + 4); \ 136b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard }) 137b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#endif // (__mips_isa_rev >= 6) 138b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 139b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard// TODO(fbarchard): Consider removing __VAR_ARGS versions. 140b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */ 141b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define LD_UB(...) LD_B(v16u8, __VA_ARGS__) 142b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 143b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */ 144b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define ST_UB(...) ST_B(v16u8, __VA_ARGS__) 145b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 146b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define ST_H(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */ 147b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define ST_UH(...) ST_H(v8u16, __VA_ARGS__) 148b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 149b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard/* Description : Load two vectors with 16 'byte' sized elements 150b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard Arguments : Inputs - psrc, stride 151b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard Outputs - out0, out1 152b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard Return Type - as per RTYPE 153b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard Details : Load 16 byte elements in 'out0' from (psrc) 154b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard Load 16 byte elements in 'out1' from (psrc + stride) 155b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard*/ 156b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define LD_B2(RTYPE, psrc, stride, out0, out1) \ 157b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard { \ 158b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard out0 = LD_B(RTYPE, (psrc)); \ 159b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard out1 = LD_B(RTYPE, (psrc) + stride); \ 160b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 161b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__) 162b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 163b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) \ 164b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard { \ 165b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard LD_B2(RTYPE, (psrc), stride, out0, out1); \ 166b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard LD_B2(RTYPE, (psrc) + 2 * stride, stride, out2, out3); \ 167b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 168b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__) 169b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 170b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard/* Description : Store two vectors with stride each having 16 'byte' sized 171b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard elements 172b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard Arguments : Inputs - in0, in1, pdst, stride 173b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard Details : Store 16 byte elements from 'in0' to (pdst) 174b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard Store 16 byte elements from 'in1' to (pdst + stride) 175b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard*/ 176b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define ST_B2(RTYPE, in0, in1, pdst, stride) \ 177b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard { \ 178b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_B(RTYPE, in0, (pdst)); \ 179b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_B(RTYPE, in1, (pdst) + stride); \ 180b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 181b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__) 182b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 183b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) \ 184b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard { \ 185b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_B2(RTYPE, in0, in1, (pdst), stride); \ 186b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \ 187b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 188b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__) 189b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 190b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard/* Description : Store vectors of 8 halfword elements with stride 191b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard Arguments : Inputs - in0, in1, pdst, stride 192b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard Details : Store 8 halfword elements from 'in0' to (pdst) 193b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard Store 8 halfword elements from 'in1' to (pdst + stride) 194b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard*/ 195b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define ST_H2(RTYPE, in0, in1, pdst, stride) \ 196b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard { \ 197b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_H(RTYPE, in0, (pdst)); \ 198b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard ST_H(RTYPE, in1, (pdst) + stride); \ 199b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 200b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define ST_UH2(...) ST_H2(v8u16, __VA_ARGS__) 201b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 202b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard// TODO(fbarchard): Consider using __msa_vshf_b and __msa_ilvr_b directly. 203b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard/* Description : Shuffle byte vector elements as per mask vector 204b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard Arguments : Inputs - in0, in1, in2, in3, mask0, mask1 205b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard Outputs - out0, out1 206b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard Return Type - as per RTYPE 207b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard Details : Byte elements from 'in0' & 'in1' are copied selectively to 208b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 'out0' as per control vector 'mask0' 209b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard*/ 210b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \ 211b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard { \ 212b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard out0 = (RTYPE)__msa_vshf_b((v16i8)mask0, (v16i8)in1, (v16i8)in0); \ 213b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard out1 = (RTYPE)__msa_vshf_b((v16i8)mask1, (v16i8)in3, (v16i8)in2); \ 214b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 215b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__) 216b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 217b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard/* Description : Interleave both left and right half of input vectors 218b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard Arguments : Inputs - in0, in1 219b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard Outputs - out0, out1 220b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard Return Type - as per RTYPE 221b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard Details : Right half of byte elements from 'in0' and 'in1' are 222b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard interleaved and written to 'out0' 223b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard*/ 224b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define ILVRL_B2(RTYPE, in0, in1, out0, out1) \ 225b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard { \ 226b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \ 227b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \ 228b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard } 229b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__) 230b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 231b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */ 232b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard 233b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#endif // INCLUDE_LIBYUV_MACROS_MSA_H_ 234