1b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard/*
2b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard *  Copyright 2016 The LibYuv Project Authors. All rights reserved.
3b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard *
4b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard *  Use of this source code is governed by a BSD-style license
5b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard *  that can be found in the LICENSE file in the root of the source
6b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard *  tree. An additional intellectual property rights grant can be found
7b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard *  in the file PATENTS. All contributing project authors may
8b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard *  be found in the AUTHORS file in the root of the source tree.
9b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard */
10b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard
11b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#ifndef INCLUDE_LIBYUV_MACROS_MSA_H_
12b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define INCLUDE_LIBYUV_MACROS_MSA_H_
13b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard
14b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
15b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#include <msa.h>
16b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#include <stdint.h>
17b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard
18b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#if (__mips_isa_rev >= 6)
19b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define LW(psrc)                                    \
20b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  ({                                                \
21b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    uint8* psrc_lw_m = (uint8*)(psrc); /* NOLINT */ \
22b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    uint32 val_m;                                   \
23b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    asm volatile("lw  %[val_m],  %[psrc_lw_m]  \n"  \
24b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard                 : [val_m] "=r"(val_m)              \
25b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard                 : [psrc_lw_m] "m"(*psrc_lw_m));    \
26b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    val_m;                                          \
27b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  })
28b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard
29b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#if (__mips == 64)
30b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define LD(psrc)                                    \
31b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  ({                                                \
32b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \
33b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    uint64 val_m = 0;                               \
34b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    asm volatile("ld  %[val_m],  %[psrc_ld_m]  \n"  \
35b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard                 : [val_m] "=r"(val_m)              \
36b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard                 : [psrc_ld_m] "m"(*psrc_ld_m));    \
37b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    val_m;                                          \
38b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  })
39b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#else  // !(__mips == 64)
40b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define LD(psrc)                                                       \
41b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  ({                                                                   \
42b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */                    \
43b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    uint32 val0_m, val1_m;                                             \
44b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    uint64 val_m = 0;                                                  \
45b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    val0_m = LW(psrc_ld_m);                                            \
46b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    val1_m = LW(psrc_ld_m + 4);                                        \
47b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    val_m = (uint64)(val1_m);                             /* NOLINT */ \
48b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \
49b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    val_m = (uint64)(val_m | (uint64)val0_m);             /* NOLINT */ \
50b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    val_m;                                                             \
51b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  })
52b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#endif  // (__mips == 64)
53b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard
54b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define SW(val, pdst)                                   \
55b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  ({                                                    \
56b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \
57b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    uint32_t val_m = (val);                             \
58b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    asm volatile("sw  %[val_m],  %[pdst_sw_m]  \n"      \
59b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard                 : [pdst_sw_m] "=m"(*pdst_sw_m)         \
60b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard                 : [val_m] "r"(val_m));                 \
61b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  })
62b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard
63b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#if (__mips == 64)
64b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define SD(val, pdst)                                   \
65b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  ({                                                    \
66b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \
67b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    uint64_t val_m = (val);                             \
68b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    asm volatile("sd  %[val_m],  %[pdst_sd_m]  \n"      \
69b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard                 : [pdst_sd_m] "=m"(*pdst_sd_m)         \
70b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard                 : [val_m] "r"(val_m));                 \
71b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  })
72b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#else  // !(__mips == 64)
73b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define SD(val, pdst)                                        \
74b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  ({                                                         \
75b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */      \
76b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    uint32_t val0_m, val1_m;                                 \
77b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    val0_m = (uint32_t)((val)&0x00000000FFFFFFFF);           \
78b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \
79b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    SW(val0_m, pdst_sd_m);                                   \
80b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    SW(val1_m, pdst_sd_m + 4);                               \
81b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  })
82b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#endif  // !(__mips == 64)
83b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#else   // !(__mips_isa_rev >= 6)
84b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define LW(psrc)                                    \
85b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  ({                                                \
86b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    uint8* psrc_lw_m = (uint8*)(psrc); /* NOLINT */ \
87b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    uint32 val_m;                                   \
88b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    asm volatile("ulw  %[val_m],  %[psrc_lw_m]  \n" \
89b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard                 : [val_m] "=r"(val_m)              \
90b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard                 : [psrc_lw_m] "m"(*psrc_lw_m));    \
91b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    val_m;                                          \
92b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  })
93b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard
94b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#if (__mips == 64)
95b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define LD(psrc)                                    \
96b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  ({                                                \
97b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \
98b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    uint64 val_m = 0;                               \
99b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    asm volatile("uld  %[val_m],  %[psrc_ld_m]  \n" \
100b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard                 : [val_m] "=r"(val_m)              \
101b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard                 : [psrc_ld_m] "m"(*psrc_ld_m));    \
102b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    val_m;                                          \
103b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  })
104b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#else  // !(__mips == 64)
105b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define LD(psrc)                                                       \
106b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  ({                                                                   \
107b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */                    \
108b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    uint32 val0_m, val1_m;                                             \
109b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    uint64 val_m = 0;                                                  \
110b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    val0_m = LW(psrc_ld_m);                                            \
111b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    val1_m = LW(psrc_ld_m + 4);                                        \
112b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    val_m = (uint64)(val1_m);                             /* NOLINT */ \
113b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \
114b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    val_m = (uint64)(val_m | (uint64)val0_m);             /* NOLINT */ \
115b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    val_m;                                                             \
116b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  })
117b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#endif  // (__mips == 64)
118b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard
119b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define SW(val, pdst)                                   \
120b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  ({                                                    \
121b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \
122b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    uint32_t val_m = (val);                             \
123b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    asm volatile("usw  %[val_m],  %[pdst_sw_m]  \n"     \
124b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard                 : [pdst_sw_m] "=m"(*pdst_sw_m)         \
125b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard                 : [val_m] "r"(val_m));                 \
126b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  })
127b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard
128b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define SD(val, pdst)                                        \
129b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  ({                                                         \
130b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */      \
131b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    uint32_t val0_m, val1_m;                                 \
132b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    val0_m = (uint32_t)((val)&0x00000000FFFFFFFF);           \
133b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \
134b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    SW(val0_m, pdst_sd_m);                                   \
135b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    SW(val1_m, pdst_sd_m + 4);                               \
136b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  })
137b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#endif  // (__mips_isa_rev >= 6)
138b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard
139b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard// TODO(fbarchard): Consider removing __VAR_ARGS versions.
140b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */
141b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define LD_UB(...) LD_B(v16u8, __VA_ARGS__)
142b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard
143b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */
144b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define ST_UB(...) ST_B(v16u8, __VA_ARGS__)
145b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard
146b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define ST_H(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */
147b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define ST_UH(...) ST_H(v8u16, __VA_ARGS__)
148b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard
149b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard/* Description : Load two vectors with 16 'byte' sized elements
150b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard   Arguments   : Inputs  - psrc, stride
151b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard                 Outputs - out0, out1
152b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard                 Return Type - as per RTYPE
153b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard   Details     : Load 16 byte elements in 'out0' from (psrc)
154b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard                 Load 16 byte elements in 'out1' from (psrc + stride)
155b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard*/
156b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define LD_B2(RTYPE, psrc, stride, out0, out1) \
157b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  {                                            \
158b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    out0 = LD_B(RTYPE, (psrc));                \
159b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    out1 = LD_B(RTYPE, (psrc) + stride);       \
160b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  }
161b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__)
162b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard
163b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) \
164b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  {                                                        \
165b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    LD_B2(RTYPE, (psrc), stride, out0, out1);              \
166b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    LD_B2(RTYPE, (psrc) + 2 * stride, stride, out2, out3); \
167b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  }
168b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__)
169b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard
170b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard/* Description : Store two vectors with stride each having 16 'byte' sized
171b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard                 elements
172b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard   Arguments   : Inputs - in0, in1, pdst, stride
173b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard   Details     : Store 16 byte elements from 'in0' to (pdst)
174b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard                 Store 16 byte elements from 'in1' to (pdst + stride)
175b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard*/
176b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define ST_B2(RTYPE, in0, in1, pdst, stride) \
177b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  {                                          \
178b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    ST_B(RTYPE, in0, (pdst));                \
179b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    ST_B(RTYPE, in1, (pdst) + stride);       \
180b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  }
181b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__)
182b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard
183b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride)   \
184b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  {                                                      \
185b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    ST_B2(RTYPE, in0, in1, (pdst), stride);              \
186b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \
187b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  }
188b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__)
189b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard
190b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard/* Description : Store vectors of 8 halfword elements with stride
191b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard   Arguments   : Inputs - in0, in1, pdst, stride
192b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard   Details     : Store 8 halfword elements from 'in0' to (pdst)
193b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard                 Store 8 halfword elements from 'in1' to (pdst + stride)
194b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard*/
195b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define ST_H2(RTYPE, in0, in1, pdst, stride) \
196b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  {                                          \
197b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    ST_H(RTYPE, in0, (pdst));                \
198b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    ST_H(RTYPE, in1, (pdst) + stride);       \
199b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  }
200b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define ST_UH2(...) ST_H2(v8u16, __VA_ARGS__)
201b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard
202b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard// TODO(fbarchard): Consider using __msa_vshf_b and __msa_ilvr_b directly.
203b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard/* Description : Shuffle byte vector elements as per mask vector
204b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard   Arguments   : Inputs  - in0, in1, in2, in3, mask0, mask1
205b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard                 Outputs - out0, out1
206b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard                 Return Type - as per RTYPE
207b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard   Details     : Byte elements from 'in0' & 'in1' are copied selectively to
208b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard                 'out0' as per control vector 'mask0'
209b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard*/
210b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1)  \
211b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  {                                                                   \
212b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    out0 = (RTYPE)__msa_vshf_b((v16i8)mask0, (v16i8)in1, (v16i8)in0); \
213b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    out1 = (RTYPE)__msa_vshf_b((v16i8)mask1, (v16i8)in3, (v16i8)in2); \
214b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  }
215b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__)
216b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard
217b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard/* Description : Interleave both left and right half of input vectors
218b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard   Arguments   : Inputs  - in0, in1
219b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard                 Outputs - out0, out1
220b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard                 Return Type - as per RTYPE
221b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard   Details     : Right half of byte elements from 'in0' and 'in1' are
222b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard                 interleaved and written to 'out0'
223b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard*/
224b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define ILVRL_B2(RTYPE, in0, in1, out0, out1)           \
225b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  {                                                     \
226b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \
227b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard    out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \
228b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard  }
229b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__)
230b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard
231b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */
232b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard
233b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard#endif  // INCLUDE_LIBYUV_MACROS_MSA_H_
234