Lines Matching refs:out0

144  *               Outputs - out0, out1, out2, out3
145 * Details : Load word in 'out0' from (psrc)
150 #define LW4(psrc, stride, out0, out1, out2, out3) do { \
152 out0 = LW(ptmp); \
215 * Outputs - out0, out1
217 * Details : Load 16 byte elements in 'out0' from (psrc)
220 #define LD_B2(RTYPE, psrc, stride, out0, out1) do { \
221 out0 = LD_B(RTYPE, psrc); \
227 #define LD_B3(RTYPE, psrc, stride, out0, out1, out2) do { \
228 LD_B2(RTYPE, psrc, stride, out0, out1); \
234 #define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) do { \
235 LD_B2(RTYPE, psrc, stride, out0, out1); \
242 out0, out1, out2, out3, out4, out5, out6, out7) do { \
243 LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3); \
251 * Outputs - out0, out1
252 * Details : Load 8 halfword elements in 'out0' from (psrc)
255 #define LD_H2(RTYPE, psrc, stride, out0, out1) do { \
256 out0 = LD_H(RTYPE, psrc); \
264 * Outputs - out0, out1, out2, out3
265 * Details : Load 4 word elements in 'out0' from (psrc + 0 * stride)
270 #define LD_W2(RTYPE, psrc, stride, out0, out1) do { \
271 out0 = LD_W(RTYPE, psrc); \
277 #define LD_W3(RTYPE, psrc, stride, out0, out1, out2) do { \
278 LD_W2(RTYPE, psrc, stride, out0, out1); \
284 #define LD_W4(RTYPE, psrc, stride, out0, out1, out2, out3) do { \
285 LD_W2(RTYPE, psrc, stride, out0, out1); \
425 * Outputs - out0, out1
428 * 'out0' as per control vector 'mask0'
438 #define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) do { \
439 out0 = VSHF_B(RTYPE, in0, in1, mask0); \
449 * Outputs - out0, out1
452 * 'out0' as per control vector 'mask0'
454 #define VSHF_H2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) do { \
455 out0 = (RTYPE)__msa_vshf_h((v8i16)mask0, (v8i16)in1, (v8i16)in0); \
463 * Outputs - out0, out1
469 * are added together and written to the 'out0' vector
471 #define DOTP_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) do { \
472 out0 = (RTYPE)__msa_dotp_s_h((v16i8)mult0, (v16i8)cnst0); \
479 * Outputs - out0, out1
485 * are added together and written to the 'out0' vector
487 #define DOTP_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) do { \
488 out0 = (RTYPE)__msa_dotp_s_w((v8i16)mult0, (v8i16)cnst0); \
495 * Outputs - out0, out1
501 * are added together and written to the 'out0' vector
503 #define DOTP_UW2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) do { \
504 out0 = (RTYPE)__msa_dotp_u_d((v4u32)mult0, (v4u32)cnst0); \
511 * Outputs - out0, out1
517 * are added to the 'out0' vector
519 #define DPADD_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) do { \
520 out0 = (RTYPE)__msa_dpadd_s_w((v4i32)out0, (v8i16)mult0, (v8i16)cnst0); \
634 Outputs - out0, out1
638 halfword result is written in 'out0'
640 #define HADD_SH2(RTYPE, in0, in1, out0, out1) do { \
641 out0 = (RTYPE)__msa_hadd_s_w((v8i16)in0, (v8i16)in0); \
646 #define HADD_SH4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) do { \
647 HADD_SH2(RTYPE, in0, in1, out0, out1); \
654 * Outputs - out0, out1
658 * halfword result is written to 'out0'
660 #define HSUB_UB2(RTYPE, in0, in1, out0, out1) do { \
661 out0 = (RTYPE)__msa_hsub_u_h((v16u8)in0, (v16u8)in0); \
707 * Outputs - out0, out1
710 * and written to 'out0'
712 #define ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) do { \
713 out0 = (RTYPE)__msa_ilvev_b((v16i8)in1, (v16i8)in0); \
724 * Outputs - out0, out1
727 * and written to 'out0'
729 #define ILVOD_B2(RTYPE, in0, in1, in2, in3, out0, out1) do { \
730 out0 = (RTYPE)__msa_ilvod_b((v16i8)in1, (v16i8)in0); \
741 * Outputs - out0, out1
744 * and written to 'out0'
746 #define ILVEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) do { \
747 out0 = (RTYPE)__msa_ilvev_h((v8i16)in1, (v8i16)in0); \
757 * Outputs - out0, out1
760 * and written to 'out0'
762 #define ILVOD_H2(RTYPE, in0, in1, in2, in3, out0, out1) do { \
763 out0 = (RTYPE)__msa_ilvod_h((v8i16)in1, (v8i16)in0); \
773 * Outputs - out0, out1
776 * and written to 'out0'
778 #define ILVEV_W2(RTYPE, in0, in1, in2, in3, out0, out1) do { \
779 out0 = (RTYPE)__msa_ilvev_w((v4i32)in1, (v4i32)in0); \
789 * Outputs - out0, out1
792 * and written to 'out0'
796 #define ILVEVOD_W2(RTYPE, in0, in1, in2, in3, out0, out1) do { \
797 out0 = (RTYPE)__msa_ilvev_w((v4i32)in1, (v4i32)in0); \
807 * Outputs - out0, out1
810 * and written to 'out0'
814 #define ILVEVOD_H2(RTYPE, in0, in1, in2, in3, out0, out1) do { \
815 out0 = (RTYPE)__msa_ilvev_h((v8i16)in1, (v8i16)in0); \
825 * Outputs - out0, out1
828 * and written to 'out0'
830 #define ILVEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) do { \
831 out0 = (RTYPE)__msa_ilvev_d((v2i64)in1, (v2i64)in0); \
841 * Outputs - out0, out1
844 * and written to 'out0'.
846 #define ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1) do { \
847 out0 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \
858 * Outputs - out0, out1
861 * and written to out0.
863 #define ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1) do { \
864 out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \
874 out0, out1, out2, out3) do { \
875 ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1); \
886 * Outputs - out0, out1
889 * interleaved and written to 'out0'.
891 #define ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1) do { \
892 out0 = (RTYPE)__msa_ilvr_h((v8i16)in0, (v8i16)in1); \
900 out0, out1, out2, out3) do { \
901 ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
910 * Outputs - out0, out1
913 * interleaved and written to 'out0'.
915 #define ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1) do { \
916 out0 = (RTYPE)__msa_ilvr_d((v2i64)in0, (v2i64)in1); \
924 out0, out1, out2, out3) do { \
925 ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1); \
933 * Outputs - out0, out1
936 * interleaved and written to 'out0'
938 #define ILVRL_B2(RTYPE, in0, in1, out0, out1) do { \
939 out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \
948 #define ILVRL_H2(RTYPE, in0, in1, out0, out1) do { \
949 out0 = (RTYPE)__msa_ilvr_h((v8i16)in0, (v8i16)in1); \
958 #define ILVRL_W2(RTYPE, in0, in1, out0, out1) do { \
959 out0 = (RTYPE)__msa_ilvr_w((v4i32)in0, (v4i32)in1); \
969 * Outputs - out0, out1
972 * 'out0' & even byte elements of 'in1' are copied to the right
973 * half of 'out0'.
975 #define PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) do { \
976 out0 = (RTYPE)__msa_pckev_b((v16i8)in0, (v16i8)in1); \
985 out0, out1, out2, out3) do { \
986 PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1); \
996 * Outputs - out0, out1
999 * 'out0' & even halfword elements of 'in1' are copied to the
1000 * right half of 'out0'.
1002 #define PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) do { \
1003 out0 = (RTYPE)__msa_pckev_h((v8i16)in0, (v8i16)in1); \
1013 * Outputs - out0, out1
1016 * 'out0' & even word elements of 'in1' are copied to the
1017 * right half of 'out0'.
1019 #define PCKEV_W2(RTYPE, in0, in1, in2, in3, out0, out1) do { \
1020 out0 = (RTYPE)__msa_pckev_w((v4i32)in0, (v4i32)in1); \
1030 * Outputs - out0, out1
1033 * 'out0' & odd halfword elements of 'in1' are copied to the
1034 * right half of 'out0'.
1036 #define PCKOD_H2(RTYPE, in0, in1, in2, in3, out0, out1) do { \
1037 out0 = (RTYPE)__msa_pckod_h((v8i16)in0, (v8i16)in1); \
1128 * Outputs - out0, out1
1130 * to 'out0'.
1132 #define ADDVI_H2(RTYPE, in0, in1, in2, in3, out0, out1) do { \
1133 out0 = (RTYPE)ADDVI_H(in0, in1); \
1141 * Outputs - out0, out1
1143 * to 'out0'.
1145 #define ADDVI_W2(RTYPE, in0, in1, in2, in3, out0, out1) do { \
1146 out0 = (RTYPE)ADDVI_W(in0, in1); \
1153 * Outputs - out0, out1
1154 * Details : GP register in0 is replicated in each word element of out0
1157 #define FILL_W2(RTYPE, in0, in1, out0, out1) do { \
1158 out0 = (RTYPE)__msa_fill_w(in0); \
1165 * Outputs - out0, out1
1167 * to 'out0'.
1169 #define ADD2(in0, in1, in2, in3, out0, out1) do { \
1170 out0 = in0 + in1; \
1175 out0, out1, out2, out3) do { \
1176 ADD2(in0, in1, in2, in3, out0, out1); \
1182 * Outputs - out0, out1
1184 * written to 'out0'.
1186 #define SUB2(in0, in1, in2, in3, out0, out1) do { \
1187 out0 = in0 - in1; \
1191 #define SUB3(in0, in1, in2, in3, in4, in5, out0, out1, out2) do { \
1192 out0 = in0 - in1; \
1198 out0, out1, out2, out3) do { \
1199 out0 = in0 - in1; \
1207 * Outputs - out0, out1
1209 * written to 'out0'.
1213 #define ADDSUB2(in0, in1, out0, out1) do { \
1214 out0 = in0 + in1; \
1220 * Outputs - out0, out1
1222 * and the result is written to 'out0'
1224 #define MUL2(in0, in1, in2, in3, out0, out1) do { \
1225 out0 = in0 * in1; \
1230 out0, out1, out2, out3) do { \
1231 MUL2(in0, in1, in2, in3, out0, out1); \
1251 * Outputs - out0, out1 (sign extended word vectors)
1255 * generate 4 signed word elements in 'out0'
1259 #define UNPCK_SH_SW(in, out0, out1) do { \
1261 ILVRL_H2_SW(tmp_m, in, out0, out1); \
1266 * Outputs - out0, out1, out2, out3
1269 #define BUTTERFLY_4(in0, in1, in2, in3, out0, out1, out2, out3) do { \
1270 out0 = in0 + in3; \
1279 * Outputs - out0, out1, out2, out3
1284 out0, out1, out2, out3) do { \
1293 ILVEVOD_H2_UB(tmp0_m, tmp1_m, tmp0_m, tmp1_m, out0, out2); \
1301 * Outputs - out0, out1, out2, out3, out4, out5, out6, out7
1306 out0, out1, out2, out3, out4, out5, \
1313 ILVEV_D2_UB(in6, in14, in7, in15, out1, out0); \
1316 ILVEV_B2_UB(out3, out2, out1, out0, out5, out7); \
1317 ILVOD_B2_SH(out3, out2, out1, out0, tmp6_m, tmp7_m); \
1319 ILVEVOD_W2_UB(tmp2_m, tmp3_m, tmp2_m, tmp3_m, out0, out4); \
1330 * Outputs - out0, out1, out2, out3
1334 out0, out1, out2, out3) do { \
1338 out0 = (RTYPE)__msa_ilvr_d((v2i64)s2_m, (v2i64)s0_m); \
1380 * Outputs - out0, out1
1384 * with rounding is calculated and written to 'out0'
1386 #define AVER_UB2(RTYPE, in0, in1, in2, in3, out0, out1) do { \
1387 out0 = (RTYPE)__msa_aver_u_b((v16u8)in0, (v16u8)in1); \