Lines Matching refs:src_temp4

309     __m128i src_temp1, src_temp3, src_temp4, src_temp5, src_temp6, m_mask;
345 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref + (2 * nt) + 16));
350 src_temp6 = _mm_unpacklo_epi8(src_temp4, m_zero);
355 src_temp4 = _mm_srli_si128(src_temp4, 8);
360 src_temp4 = _mm_unpacklo_epi8(src_temp4, m_zero);
364 src_temp4 = _mm_add_epi16(src_temp4, src_temp6);
369 src_temp4 = _mm_add_epi16(src_temp4, src_temp6);
372 src_temp4 = _mm_add_epi16(src_temp4, src_temp8);
373 src_temp4 = _mm_shuffle_epi8(src_temp4, m_mask);
374 src_temp4 = _mm_hadd_epi16(src_temp4, m_zero);
375 src_temp4 = _mm_hadd_epi16(src_temp4, m_zero);
377 sign_8x16b = _mm_cmpgt_epi16(m_zero, src_temp4);
378 src_temp4 = _mm_unpacklo_epi16(src_temp4, sign_8x16b);
380 temp_sad = _mm_srli_si128(src_temp4, 4); /* Next 32 bits */
381 acc_dc_u = _mm_cvtsi128_si32(src_temp4);
389 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref + (2 * nt) + 16));
392 src_temp6 = _mm_unpacklo_epi8(src_temp4, m_zero);
395 src_temp4 = _mm_srli_si128(src_temp4, 8);
398 src_temp4 = _mm_unpacklo_epi8(src_temp4, m_zero);
400 src_temp4 = _mm_add_epi16(src_temp4, src_temp6);
403 src_temp4 = _mm_add_epi16(src_temp4, src_temp6);
404 src_temp4 = _mm_shuffle_epi8(src_temp4, m_mask);
405 src_temp4 = _mm_hadd_epi16(src_temp4, m_zero);
406 src_temp4 = _mm_hadd_epi16(src_temp4, m_zero);
408 sign_8x16b = _mm_cmpgt_epi16(m_zero, src_temp4);
409 src_temp4 = _mm_unpacklo_epi16(src_temp4, sign_8x16b);
411 temp_sad = _mm_srli_si128(src_temp4, 4); /* Next 32 bits */
412 acc_dc_u = _mm_cvtsi128_si32(src_temp4);
422 src_temp4 = _mm_srli_si128(src_temp3, 8);
424 src_temp4 = _mm_unpacklo_epi8(src_temp4, m_zero);
426 src_temp4 = _mm_add_epi16(src_temp4, src_temp5);
428 src_temp4 = _mm_shuffle_epi8(src_temp4, m_mask);
429 src_temp4 = _mm_hadd_epi16(src_temp4, m_zero);
430 src_temp4 = _mm_hadd_epi16(src_temp4, m_zero);
432 sign_8x16b = _mm_cmpgt_epi16(m_zero, src_temp4);
433 src_temp4 = _mm_unpacklo_epi16(src_temp4, sign_8x16b);
435 temp_sad = _mm_srli_si128(src_temp4, 4); /* Next 32 bits */
436 acc_dc_u = _mm_cvtsi128_si32(src_temp4);
824 __m128i src_temp1, src_temp2, src_temp3, src_temp4, src_temp5, src_temp6, src_temp7, src_temp8, sm2, sm3;
840 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) - 2 * 3 - 8 - 2));
845 _mm_storel_epi64((__m128i *)(pu1_dst + (3 * dst_strd)), _mm_shuffle_epi8(src_temp4, sm2));
854 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) - 2 * 3 - 16 - 2));
863 _mm_storeu_si128((__m128i *)(pu1_dst + (3 * dst_strd)), _mm_shuffle_epi8(src_temp4, sm3));
880 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) - 2 * (row + 3) - (col + 16) - 2));
889 _mm_storeu_si128((__m128i *)(pu1_dst + col + ((row + 3) * dst_strd)), _mm_shuffle_epi8(src_temp4, sm3));
946 __m128i src_temp1, src_temp2, src_temp3, src_temp4, src_temp5, src_temp6, src_temp7, src_temp8;
957 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (3 + 1) + (4 * nt) + 2 * idx + 2));
962 _mm_storel_epi64((__m128i *)(pu1_dst + (3 * dst_strd)), src_temp4);
971 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (3 + 1) + (4 * nt) + 2 * idx + 2));
980 _mm_storeu_si128((__m128i *)(pu1_dst + (3 * dst_strd)), src_temp4);
1000 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (3 + 1) + 0 + (4 * nt) + 2 * idx + 2));
1009 _mm_storeu_si128((__m128i *)(pu1_dst + 0 + (3 * dst_strd)), src_temp4);
1043 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (3 + 1) + (4 * nt) + 2 * idx + 2));
1048 _mm_storel_epi64((__m128i *)(pu1_dst + (3 * dst_strd)), src_temp4);
1058 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (3 + 1) + (4 * nt) + 2 * idx + 2));
1067 _mm_storeu_si128((__m128i *)(pu1_dst + (3 * dst_strd)), src_temp4);
1087 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (3 + 1) + 0 + (4 * nt) + 2 * idx + 2));
1096 _mm_storeu_si128((__m128i *)(pu1_dst + 0 + (3 * dst_strd)), src_temp4);