/external/libavc/encoder/x86/ |
H A D | ime_distortion_metrics_sse42.c | 116 src_r0 = _mm_loadu_si128((__m128i *) (pu1_src)); 117 src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + src_strd)); 118 src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd)); 119 src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 3*src_strd)); 121 est_r0 = _mm_loadu_si128((__m128i *) (pu1_est)); 122 est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + est_strd)); 123 est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd)); 124 est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 3*est_strd)); 139 src_r0 = _mm_loadu_si128((__m128i *) (pu1_src)); 140 src_r1 = _mm_loadu_si128((__m128 [all...] |
H A D | ih264e_half_pel_ssse3.c | 132 src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 133 src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 283 src1_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); 287 src1_r1_16x8b = _mm_loadu_si128((__m128i *)pu1_src); 291 src1_r2_16x8b = _mm_loadu_si128((__m128i *)pu1_src); 295 src1_r3_16x8b = _mm_loadu_si128((__m128i *)pu1_src); 299 src1_r4_16x8b = _mm_loadu_si128((__m128i *)pu1_src); 305 src1_r5_16x8b = _mm_loadu_si128((__m128i *)pu1_src); 393 src_r0_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1)); 394 src_r1_8x16b = _mm_loadu_si128((__m128 [all...] |
H A D | ih264e_intra_modes_eval_ssse3.c | 190 src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src_temp); 191 src2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd)); 192 src3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd2)); 193 src4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd3)); 214 src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src_temp); 215 src2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd)); 216 src3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd2)); 217 src4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd3)); 244 val1_16x8b = _mm_loadu_si128((__m128i *)(pu1_ngbr_pels_i16 + 17)); 246 src1_16x8b = _mm_loadu_si128((__m128 [all...] |
/external/skia/src/opts/ |
H A D | SkBlitRow_opts_SSE4.cpp | 28 __m128i s0 = _mm_loadu_si128(src4+i+0), 29 s1 = _mm_loadu_si128(src4+i+1), 30 s2 = _mm_loadu_si128(src4+i+2), 31 s3 = _mm_loadu_si128(src4+i+3); 49 _mm_storeu_si128(dst4+i+0, SkPMSrcOver_SSE2(s0, _mm_loadu_si128(dst4+i+0))); 50 _mm_storeu_si128(dst4+i+1, SkPMSrcOver_SSE2(s1, _mm_loadu_si128(dst4+i+1))); 51 _mm_storeu_si128(dst4+i+2, SkPMSrcOver_SSE2(s2, _mm_loadu_si128(dst4+i+2))); 52 _mm_storeu_si128(dst4+i+3, SkPMSrcOver_SSE2(s3, _mm_loadu_si128(dst4+i+3)));
|
H A D | SkUtils_opts_SSE2.cpp | 81 __m128i a = _mm_loadu_si128(src128++); 82 __m128i b = _mm_loadu_si128(src128++); 83 __m128i c = _mm_loadu_si128(src128++); 84 __m128i d = _mm_loadu_si128(src128++);
|
/external/libhevc/common/x86/ |
H A D | ihevc_32x32_itrans_recon_sse42_intr.c | 251 m_temp_reg_70 = _mm_loadu_si128((__m128i *)pi2_tmp_src); 253 m_temp_reg_71 = _mm_loadu_si128((__m128i *)pi2_tmp_src); 255 m_temp_reg_72 = _mm_loadu_si128((__m128i *)pi2_tmp_src); 257 m_temp_reg_73 = _mm_loadu_si128((__m128i *)pi2_tmp_src); 259 m_temp_reg_74 = _mm_loadu_si128((__m128i *)pi2_tmp_src); 261 m_temp_reg_75 = _mm_loadu_si128((__m128i *)pi2_tmp_src); 263 m_temp_reg_76 = _mm_loadu_si128((__m128i *)pi2_tmp_src); 265 m_temp_reg_77 = _mm_loadu_si128((__m128i *)pi2_tmp_src); 268 m_temp_reg_80 = _mm_loadu_si128((__m128i *)pi2_tmp_src); 270 m_temp_reg_81 = _mm_loadu_si128((__m128 [all...] |
H A D | ihevc_16x16_itrans_recon_sse42_intr.c | 204 m_temp_reg_70 = _mm_loadu_si128((__m128i *)pi2_tmp_src); 206 m_temp_reg_71 = _mm_loadu_si128((__m128i *)pi2_tmp_src); 208 m_temp_reg_72 = _mm_loadu_si128((__m128i *)pi2_tmp_src); 210 m_temp_reg_73 = _mm_loadu_si128((__m128i *)pi2_tmp_src); 212 m_temp_reg_74 = _mm_loadu_si128((__m128i *)pi2_tmp_src); 214 m_temp_reg_75 = _mm_loadu_si128((__m128i *)pi2_tmp_src); 216 m_temp_reg_76 = _mm_loadu_si128((__m128i *)pi2_tmp_src); 218 m_temp_reg_77 = _mm_loadu_si128((__m128i *)pi2_tmp_src); 243 m_coeff1 = _mm_loadu_si128((__m128i *)&g_ai2_ihevc_trans_16_even[2][0]); //89 75 289 m_coeff3 = _mm_loadu_si128((__m128 [all...] |
H A D | ihevc_intra_pred_filters_sse42_intr.c | 153 src_temp1 = _mm_loadu_si128((__m128i *)(pu1_src)); 162 src_temp1 = _mm_loadu_si128((__m128i *)(pu1_src)); 163 src_temp2 = _mm_loadu_si128((__m128i *)(pu1_src + 16)); 174 src_temp1 = _mm_loadu_si128((__m128i *)(pu1_src)); 175 src_temp2 = _mm_loadu_si128((__m128i *)(pu1_src + 16)); 176 src_temp3 = _mm_loadu_si128((__m128i *)(pu1_src + 32)); 177 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_src + 48)); 189 src_temp1 = _mm_loadu_si128((__m128i *)(pu1_src)); 190 src_temp2 = _mm_loadu_si128((__m128i *)(pu1_src + 16)); 191 src_temp3 = _mm_loadu_si128((__m128 [all...] |
H A D | ihevc_intra_pred_filters_ssse3_intr.c | 437 src_temp1 = _mm_loadu_si128((__m128i *)(pu1_src)); 446 src_temp1 = _mm_loadu_si128((__m128i *)(pu1_src)); 447 src_temp2 = _mm_loadu_si128((__m128i *)(pu1_src + 16)); 458 src_temp1 = _mm_loadu_si128((__m128i *)(pu1_src)); 459 src_temp2 = _mm_loadu_si128((__m128i *)(pu1_src + 16)); 460 src_temp3 = _mm_loadu_si128((__m128i *)(pu1_src + 32)); 461 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_src + 48)); 473 src_temp1 = _mm_loadu_si128((__m128i *)(pu1_src)); 474 src_temp2 = _mm_loadu_si128((__m128i *)(pu1_src + 16)); 475 src_temp3 = _mm_loadu_si128((__m128 [all...] |
H A D | ihevc_weighted_pred_sse42_intr.c | 152 src_temp0_4x32b = _mm_loadu_si128((__m128i *)(pi2_src)); 154 src_temp1_4x32b = _mm_loadu_si128((__m128i *)(pi2_src + src_strd)); 156 src_temp2_4x32b = _mm_loadu_si128((__m128i *)(pi2_src + 2 * src_strd)); 158 src_temp3_4x32b = _mm_loadu_si128((__m128i *)(pi2_src + 3 * src_strd)); 161 src_temp4_4x32b = _mm_loadu_si128((__m128i *)(pi2_src + 4)); 163 src_temp5_4x32b = _mm_loadu_si128((__m128i *)(pi2_src + src_strd + 4)); 165 src_temp6_4x32b = _mm_loadu_si128((__m128i *)(pi2_src + 2 * src_strd + 4)); 167 src_temp7_4x32b = _mm_loadu_si128((__m128i *)(pi2_src + 3 * src_strd + 4)); 277 src_temp0_4x32b = _mm_loadu_si128((__m128i *)(pi2_src)); 279 src_temp1_4x32b = _mm_loadu_si128((__m128 [all...] |
H A D | ihevc_padding_ssse3_intr.c | 109 src_temp0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); 180 src_temp0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); 250 src_temp0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src - 1)); 323 src_temp0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src - 2));
|
H A D | ihevc_chroma_intra_pred_filters_ssse3_intr.c | 229 src_temp_8x16b = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (two_nt + 1) + col)); 344 src_temp3 = _mm_loadu_si128((__m128i *)(pu1_ref + (2 * nt))); 345 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref + (2 * nt) + 16)); 346 src_temp7 = _mm_loadu_si128((__m128i *)(pu1_ref + (2 * nt) + 32)); 347 src_temp8 = _mm_loadu_si128((__m128i *)(pu1_ref + (2 * nt) + 48)); 388 src_temp3 = _mm_loadu_si128((__m128i *)(pu1_ref + (2 * nt))); 389 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref + (2 * nt) + 16)); 419 src_temp3 = _mm_loadu_si128((__m128i *)(pu1_ref + (2 * nt))); 705 src_temp1 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) + 2 + 0)); 722 temp1 = _mm_loadu_si128((__m128 [all...] |
H A D | ihevc_weighted_pred_ssse3_intr.c | 165 src_temp0_8x16b = _mm_loadu_si128((__m128i *)(pi2_src)); 167 src_temp1_8x16b = _mm_loadu_si128((__m128i *)(pi2_src + src_strd)); 169 src_temp2_8x16b = _mm_loadu_si128((__m128i *)(pi2_src + 2 * src_strd)); 171 src_temp3_8x16b = _mm_loadu_si128((__m128i *)(pi2_src + 3 * src_strd)); 457 src_temp0_8x16b = _mm_loadu_si128((__m128i *)(pi2_src)); 459 src_temp1_8x16b = _mm_loadu_si128((__m128i *)(pi2_src + src_strd)); 461 src_temp2_8x16b = _mm_loadu_si128((__m128i *)(pi2_src + 8)); 463 src_temp3_8x16b = _mm_loadu_si128((__m128i *)(pi2_src + src_strd + 8)); 551 src_temp0_8x16b = _mm_loadu_si128((__m128i *)(pi2_src)); 553 src_temp1_8x16b = _mm_loadu_si128((__m128 [all...] |
H A D | ihevc_itrans_recon_sse42_intr.c | 153 m_coeff3 = _mm_loadu_si128((__m128i *)&g_ai4_ihevc_trans_4_ttype1[2][0]); //74 201 m_coeff1 = _mm_loadu_si128((__m128i *)&g_ai4_ihevc_trans_4_ttype1[1][0]); //29 202 m_coeff2 = _mm_loadu_si128((__m128i *)&g_ai4_ihevc_trans_4_ttype1[0][0]); //55 499 m_coeff1 = _mm_loadu_si128((__m128i *)&g_ai4_ihevc_trans_4_ttype0[0][0]); //36 500 m_coeff3 = _mm_loadu_si128((__m128i *)&g_ai4_ihevc_trans_4_ttype0[2][0]); //83 856 m_temp_reg_70 = _mm_loadu_si128((__m128i *)pi2_src); 858 m_temp_reg_71 = _mm_loadu_si128((__m128i *)pi2_src); 860 m_temp_reg_72 = _mm_loadu_si128((__m128i *)pi2_src); 862 m_temp_reg_73 = _mm_loadu_si128((__m128i *)pi2_src); 865 m_temp_reg_74 = _mm_loadu_si128((__m128 [all...] |
/external/libmpeg2/common/x86/ |
H A D | impeg2_inter_pred_sse42_intr.c | 84 src_r0 = _mm_loadu_si128((__m128i *) (src)); 85 src_r1 = _mm_loadu_si128((__m128i *) (src + src_wd)); 86 src_r2 = _mm_loadu_si128((__m128i *) (src + 2 * src_wd)); 87 src_r3 = _mm_loadu_si128((__m128i *) (src + 3 * src_wd)); 97 src_r0 = _mm_loadu_si128((__m128i *) (src)); 98 src_r1 = _mm_loadu_si128((__m128i *) (src + src_wd)); 99 src_r2 = _mm_loadu_si128((__m128i *) (src + 2 * src_wd)); 100 src_r3 = _mm_loadu_si128((__m128i *) (src + 3 * src_wd)); 110 src_r0 = _mm_loadu_si128((__m128i *) (src)); 111 src_r1 = _mm_loadu_si128((__m128 [all...] |
H A D | impeg2_idct_recon_sse42_intr.c | 194 m_temp_reg_70 = _mm_loadu_si128((__m128i *)pi2_src); 196 m_temp_reg_71 = _mm_loadu_si128((__m128i *)pi2_src); 198 m_temp_reg_72 = _mm_loadu_si128((__m128i *)pi2_src); 200 m_temp_reg_73 = _mm_loadu_si128((__m128i *)pi2_src); 203 m_temp_reg_74 = _mm_loadu_si128((__m128i *)pi2_src); 205 m_temp_reg_75 = _mm_loadu_si128((__m128i *)pi2_src); 207 m_temp_reg_76 = _mm_loadu_si128((__m128i *)pi2_src); 209 m_temp_reg_77 = _mm_loadu_si128((__m128i *)pi2_src); 220 m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[3][0]); 221 m_coeff1 = _mm_loadu_si128((__m128 [all...] |
/external/libhevc/decoder/x86/ |
H A D | ihevcd_fmt_conv_ssse3_intr.c | 124 alt_first_mask = _mm_loadu_si128((__m128i *)&FIRST_ALT_SHUFFLE[0]); 147 src_uv0_8x16b = _mm_loadu_si128((__m128i *)pu1_uv_src_temp); 148 src_uv1_8x16b = _mm_loadu_si128((__m128i *)(pu1_uv_src_temp + 16)); 160 src_uv0_8x16b = _mm_loadu_si128((__m128i *)(pu1_uv_src_temp + (1 * src_strd))); 161 src_uv1_8x16b = _mm_loadu_si128((__m128i *)(pu1_uv_src_temp + (1 * src_strd) + 16)); 173 src_uv0_8x16b = _mm_loadu_si128((__m128i *)(pu1_uv_src_temp + (2 * src_strd))); 174 src_uv1_8x16b = _mm_loadu_si128((__m128i *)(pu1_uv_src_temp + (2 * src_strd) + 16)); 186 src_uv0_8x16b = _mm_loadu_si128((__m128i *)(pu1_uv_src_temp + (3 * src_strd))); 187 src_uv1_8x16b = _mm_loadu_si128((__m128i *)(pu1_uv_src_temp + (3 * src_strd) + 16)); 222 src_uv0_8x16b = _mm_loadu_si128((__m128 [all...] |
/external/libvpx/libvpx/vp9/common/x86/ |
H A D | vp9_subpixel_8t_intrin_avx2.c | 72 filtersReg = _mm_loadu_si128((__m128i *)filter); 107 _mm_loadu_si128((__m128i *)(src_ptr-3))); 109 _mm_loadu_si128((__m128i *) 138 _mm_loadu_si128((__m128i *)(src_ptr+5))); 140 _mm_loadu_si128((__m128i *) 205 srcReg1 = _mm_loadu_si128((__m128i *)(src_ptr-3)); 240 srcReg2 = _mm_loadu_si128((__m128i *)(src_ptr+5)); 317 filtersReg = _mm_loadu_si128((__m128i *)filter); 347 _mm_loadu_si128((__m128i *)(src_ptr))); 349 _mm_loadu_si128((__m128 [all...] |
H A D | vp9_subpixel_8t_intrin_ssse3.c | 54 filtersReg = _mm_loadu_si128((__m128i *)filter); 75 srcReg = _mm_loadu_si128((__m128i *)(src_ptr-3)); 126 filtersReg = _mm_loadu_si128((__m128i *)filter); 150 srcReg = _mm_loadu_si128((__m128i *)(src_ptr-3)); 206 filtersReg = _mm_loadu_si128((__m128i *)filter); 230 srcReg1 = _mm_loadu_si128((__m128i *)(src_ptr-3)); 257 srcReg2 = _mm_loadu_si128((__m128i *)(src_ptr+5)); 322 filtersReg = _mm_loadu_si128((__m128i *)filter); 400 filtersReg = _mm_loadu_si128((__m128i *)filter); 416 srcRegFilt1 = _mm_loadu_si128((__m128 [all...] |
/external/libavc/common/x86/ |
H A D | ih264_inter_pred_filters_ssse3.c | 165 y_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); 166 y_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd)); 167 y_2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd2)); 168 y_3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd3)); 169 y_4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd4)); 170 y_5_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd5)); 171 y_6_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd6)); 172 y_7_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd7)); 252 src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 253 src_r1_16x8b = _mm_loadu_si128((__m128 [all...] |
H A D | ih264_weighted_pred_sse42.c | 170 y0_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src1); 171 y0_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src1 + src_strd1)); 172 y0_2_16x8b = _mm_loadu_si128( 174 y0_3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src1 + src_strd1 * 3)); 175 y0_4_16x8b = _mm_loadu_si128( 177 y0_5_16x8b = _mm_loadu_si128((__m128i *)(pu1_src1 + src_strd1 * 5)); 178 y0_6_16x8b = _mm_loadu_si128((__m128i *)(pu1_src1 + src_strd1 * 6)); 179 y0_7_16x8b = _mm_loadu_si128((__m128i *)(pu1_src1 + src_strd1 * 7)); 181 y1_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src2); 182 y1_1_16x8b = _mm_loadu_si128((__m128 [all...] |
H A D | ih264_iquant_itrans_recon_ssse3.c | 130 src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src)); //a00 a01 a02 a03 a10 a11 a12 a13 -- the source matrix 0th,1st row 131 src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); //a20 a21 a22 a23 a30 a31 a32 a33 -- the source matrix 2nd,3rd row 132 scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat)); //b00 b01 b02 b03 b10 b11 b12 b13 -- the scaling matrix 0th,1st row 133 scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 8)); //b20 b21 b22 b23 b30 b31 b32 b33 -- the scaling matrix 2nd,3rd row 134 dequant_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat)); //q00 q01 q02 q03 q10 q11 q12 q13 -- all 16 bits 135 dequant_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat + 8)); //q20 q21 q22 q23 q30 q31 q32 q33 -- all 16 bits 412 src_r0 = _mm_loadu_si128((__m128i *) (pi2_src)); //a00 a01 a02 a03 a04 a05 a06 a07 -- the source matrix 0th row 413 scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat)); //b00 b01 b02 b03 b04 b05 b06 b07 -- the scaling matrix 0th row 414 dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[0])); //q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits 435 src_r0 = _mm_loadu_si128((__m128 [all...] |
/external/webp/src/dsp/ |
H A D | alpha_processing_sse2.c | 41 const __m128i a0 = _mm_loadu_si128(src + 0); 42 const __m128i a1 = _mm_loadu_si128(src + 1);
|
H A D | lossless_sse2.c | 163 const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); 179 const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); 218 const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); 257 const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); 294 const __m128i bgra0 = _mm_loadu_si128(in++); // bgra0|bgra1|bgra2|bgra3 295 const __m128i bgra4 = _mm_loadu_si128(in++); // bgra4|bgra5|bgra6|bgra7 323 const __m128i bgra0 = _mm_loadu_si128(in++); // bgra0|bgra1|bgra2|bgra3 324 const __m128i bgra4 = _mm_loadu_si128(in++); // bgra4|bgra5|bgra6|bgra7 358 const __m128i bgra0 = _mm_loadu_si128(in++); // bgra0|bgra1|bgra2|bgra3 359 const __m128i bgra4 = _mm_loadu_si128(i [all...] |
H A D | enc_sse2.c | 71 const __m128i out0 = _mm_loadu_si128((__m128i*)&out[0]); 72 const __m128i out1 = _mm_loadu_si128((__m128i*)&out[8]); 469 const __m128i src0 = _mm_loadu_si128((__m128i*)&tmp[0]); 470 const __m128i src1 = _mm_loadu_si128((__m128i*)&tmp[4]); 471 const __m128i src2 = _mm_loadu_si128((__m128i*)&tmp[8]); 472 const __m128i src3 = _mm_loadu_si128((__m128i*)&tmp[12]); 500 const __m128i a0 = _mm_loadu_si128((__m128i*)&a[BPS * 0]); 501 const __m128i a1 = _mm_loadu_si128((__m128i*)&a[BPS * 1]); 502 const __m128i a2 = _mm_loadu_si128((__m128i*)&a[BPS * 2]); 503 const __m128i a3 = _mm_loadu_si128((__m128 [all...] |