Lines Matching refs:q0

325                                   const __m128i* const q0,
330 const __m128i t_2 = MM_ABS(*q1, *q0);
334 const __m128i t_4 = _mm_subs_epu8(t_2, h); // abs(q1 - q0) - hev_tresh
343 const __m128i* const q0,
348 const __m128i q0_p0 = _mm_subs_epi8(*q0, *p0); // q0 - p0
349 const __m128i s1 = _mm_adds_epi8(p1_q1, q0_p0); // p1 - q1 + 1 * (q0 - p0)
350 const __m128i s2 = _mm_adds_epi8(q0_p0, s1); // p1 - q1 + 2 * (q0 - p0)
351 const __m128i s3 = _mm_adds_epi8(q0_p0, s2); // p1 - q1 + 3 * (q0 - p0)
356 static WEBP_INLINE void DoSimpleFilter(__m128i* const p0, __m128i* const q0,
365 *q0 = _mm_subs_epi8(*q0, v4); // q0 -= v4
388 const __m128i* const q0,
397 const __m128i t4 = MM_ABS(*p0, *q0); // abs(p0 - q0)
398 const __m128i t5 = _mm_adds_epu8(t4, t4); // abs(p0 - q0) * 2
399 const __m128i t6 = _mm_adds_epu8(t5, t3); // abs(p0-q0)*2 + abs(p1-q1)/2
408 // Applies filter on 2 pixels (p0 and q0)
410 __m128i* const q0, __m128i* const q1,
418 NeedsFilter(p1, p0, q0, q1, thresh, &mask);
420 FLIP_SIGN_BIT2(*p0, *q0);
421 GetBaseDelta(&p1s, p0, q0, &q1s, &a);
423 DoSimpleFilter(p0, q0, &a);
424 FLIP_SIGN_BIT2(*p0, *q0);
427 // Applies filter on 4 pixels (p1, p0, q0 and q1)
429 __m128i* const q0, __m128i* const q1,
438 GetNotHEV(p1, p0, q0, q1, hev_thresh, &not_hev);
441 FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
445 t2 = _mm_subs_epi8(*q0, *p0); // q0 - p0
446 t1 = _mm_adds_epi8(t1, t2); // hev(p1 - q1) + 1 * (q0 - p0)
447 t1 = _mm_adds_epi8(t1, t2); // hev(p1 - q1) + 2 * (q0 - p0)
448 t1 = _mm_adds_epi8(t1, t2); // hev(p1 - q1) + 3 * (q0 - p0)
453 t2 = _mm_adds_epi8(t1, t2); // 3 * (q0 - p0) + (p1 - q1) + 3
454 t3 = _mm_adds_epi8(t1, t3); // 3 * (q0 - p0) + (p1 - q1) + 4
455 SignedShift8b(&t2); // (3 * (q0 - p0) + hev(p1 - q1) + 3) >> 3
456 SignedShift8b(&t3); // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 3
458 *q0 = _mm_subs_epi8(*q0, t3); // q0 -= t3
459 FLIP_SIGN_BIT2(*p0, *q0);
472 // Applies filter on 6 pixels (p2, p1, p0, q0, q1 and q2)
474 __m128i* const p0, __m128i* const q0,
482 GetNotHEV(p1, p0, q0, q1, hev_thresh, &not_hev);
484 FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
486 GetBaseDelta(p1, p0, q0, q1, &a);
491 DoSimpleFilter(p0, q0, &f);
518 Update2Pixels(p0, q0, &a0_lo, &a0_hi);
570 __m128i* const q0, __m128i* const q1) {
584 // q0 = 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
587 Load8x4(r0, stride, p1, q0);
591 t2 = *q0;
594 // q0 = f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
598 *q0 = _mm_unpacklo_epi64(t2, *q1);
613 const __m128i* const q0,
625 // q0 = 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
627 t1 = *q0;
632 // q0 = 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
659 __m128i q0 = _mm_loadu_si128((__m128i*)&p[0]);
662 DoFilter2(&p1, &p0, &q0, &q1, thresh);
666 _mm_storeu_si128((__m128i*)&p[0], q0);
670 __m128i p1, p0, q0, q1;
674 Load16x4(p, p + 8 * stride, stride, &p1, &p0, &q0, &q1);
675 DoFilter2(&p1, &p0, &q0, &q1, thresh);
676 Store16x4(&p1, &p0, &q0, &q1, p, p + 8 * stride, stride);
738 const __m128i* const q0,
746 NeedsFilter(p1, p0, q0, q1, thresh, &filter_mask);
755 __m128i p2, p1, p0, q0, q1, q2;
761 // Load q0, q1, q2, q3
762 LOAD_H_EDGES4(p, stride, q0, q1, q2, t1);
763 MAX_DIFF2(t1, q2, q1, q0, mask);
765 ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
766 DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
772 _mm_storeu_si128((__m128i*)&p[+0 * stride], q0);
780 __m128i p3, p2, p1, p0, q0, q1, q2, q3;
786 Load16x4(p, p + 8 * stride, stride, &q0, &q1, &q2, &q3); // q0, q1, q2, q3
787 MAX_DIFF2(q3, q2, q1, q0, mask);
789 ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
790 DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
793 Store16x4(&q0, &q1, &q2, &q3, p, p + 8 * stride, stride);
841 p += 4; // beginning of q0 (and next span)
862 __m128i t1, p2, p1, p0, q0, q1, q2;
868 // Load q0, q1, q2, q3
869 LOADUV_H_EDGES4(u, v, stride, q0, q1, q2, t1);
870 MAX_DIFF2(t1, q2, q1, q0, mask);
872 ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
873 DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
879 STOREUV(q0, u, v, 0 * stride);
887 __m128i p3, p2, p1, p0, q0, q1, q2, q3;
894 Load16x4(u, v, stride, &q0, &q1, &q2, &q3); // q0, q1, q2, q3
895 MAX_DIFF2(q3, q2, q1, q0, mask);
897 ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
898 DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
901 Store16x4(&q0, &q1, &q2, &q3, u, v, stride);
907 __m128i t1, t2, p1, p0, q0, q1;
916 // Load q0, q1, q2, q3
917 LOADUV_H_EDGES4(u, v, stride, q0, q1, t1, t2);
918 MAX_DIFF2(t2, t1, q1, q0, mask);
920 ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
921 DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
926 STOREUV(q0, u, v, 0 * stride);
933 __m128i t1, t2, p1, p0, q0, q1;
937 u += 4; // beginning of q0
939 Load16x4(u, v, stride, &q0, &q1, &t1, &t2); // q0, q1, q2, q3
940 MAX_DIFF2(t2, t1, q1, q0, mask);
942 ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
943 DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
947 Store16x4(&p1, &p0, &q0, &q1, u, v, stride);