Lines Matching defs:q2

117     uint8x16_t* const q2, uint8x16_t* const q3) {
119 Load4x16_NEON(src + 2, stride, q0, q1, q2, q3);
137 uint8x16_t* const q2, uint8x16_t* const q3) {
139 Load16x4_NEON(src + 2 * stride, stride, q0, q1, q2, q3);
146 uint8x16_t* const q2, uint8x16_t* const q3) {
155 *q2 = vcombine_u8(vld1_u8(u + 2 * stride), vld1_u8(v + 2 * stride));
168 uint8x16_t* const q2, uint8x16_t* const q3) {
216 *q2 = vreinterpretq_u8_u32(row26.val[1]);
319 const uint8x16_t q0, const uint8x16_t q1, const uint8x16_t q2,
323 INIT_VECTOR3(u1, vget_low_u8(q0), vget_low_u8(q1), vget_low_u8(q2));
325 INIT_VECTOR3(v1, vget_high_u8(q0), vget_high_u8(q1), vget_high_u8(q2));
556 #define QRegs "q0", "q1", "q2", "q3", \
609 "vld1.u8 {q2}, [%[p]], %[stride] \n" // p0
613 DO_FILTER2(q1, q2, q3, q12, %[thresh])
617 "vst1.u8 {q2}, [%[p]], %[stride] \n" // store op0
634 "vswp d5, d26 \n" // q0:q2 q1:q4
635 "vswp q2, q12 \n" // p1:q1 p0:q2 q0:q3 q1:q4
637 DO_FILTER2(q1, q2, q12, q13, %[thresh])
689 const uint8x16_t q2, const uint8x16_t q3,
695 const uint8x16_t a_q3_q2 = vabdq_u8(q3, q2); // abs(q3 - q2)
696 const uint8x16_t a_q2_q1 = vabdq_u8(q2, q1); // abs(q2 - q1)
766 const int8x16_t q0, const int8x16_t q1, const int8x16_t q2,
797 *oq2 = FlipSignBack_NEON(vqsubq_s8(q2, a3)); // clip(q2 - a3)
803 const uint8x16_t q0, const uint8x16_t q1, const uint8x16_t q2,
813 const int8x16_t q2s = FlipSign_NEON(q2);
839 uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
840 Load16x8_NEON(p, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
842 const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
846 DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask,
856 uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
857 Load8x16_NEON(p, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
859 const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
863 DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask,
878 uint8x16_t q0, q1, q2, q3;
880 Load16x4_NEON(p + 2 * stride, stride, &q0, &q1, &q2, &q3);
883 NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
886 // re-used for next span. And q2/q3 will become p1/p0 accordingly.
889 p1 = q2;
902 uint8x16_t q0, q1, q2, q3;
904 Load4x16_NEON(p + 2, stride, &q0, &q1, &q2, &q3);
907 NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
911 p1 = q2;
921 uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
922 Load8x8x2_NEON(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
924 const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
928 DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask,
937 uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
940 Load8x8x2_NEON(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
942 const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
954 uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
955 Load8x8x2T_NEON(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
957 const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
961 DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask,
969 uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
972 Load8x8x2T_NEON(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
974 const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
1059 "vld1.16 {q1, q2}, [%[in]] \n"
1072 "vqdmulh.s16 q8, q2, d0[0] \n"
1073 "vqdmulh.s16 q9, q2, d0[1] \n"
1092 "vqadd.s16 q8, q2, q8 \n"
1110 "vzip.16 q1, q2 \n"
1111 "vzip.16 q1, q2 \n"
1118 "vqdmulh.s16 q8, q2, d0[0] \n"
1119 "vqdmulh.s16 q9, q2, d0[1] \n"
1129 "vqadd.s16 q8, q2, q8 \n"
1160 "vzip.16 q1, q2 \n"
1161 "vzip.16 q1, q2 \n"
1168 "vqadd.s16 q2, q2, q9 \n"
1171 "vqmovun.s16 d1, q2 \n"
1180 : "memory", "q0", "q1", "q2", "q8", "q9", "q10", "q11" /* clobbered */