/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/common/arm/neon/ |
H A D | dequant_idct_neon.c | 24 int16x8_t q1, q2, q3, q4, q5, q6; local 35 q4 = vld1q_s16(input); 55 q2 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q4), 64 q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1); 67 q4 = vshrq_n_s16(q4, 1); 70 q4 = vqaddq_s16(q4, q2); 72 d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4)); 73 d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4)); [all...] |
H A D | idct_dequant_full_2x_neon.asm | 28 vld1.16 {q4, q5}, [r0] ; r q 46 vmul.i16 q4, q4, q0 52 ; q4: l4r4 q5: l12r12 61 vqdmulh.s16 q6, q4, d0[2] ; sinpi8sqrt2 63 vqdmulh.s16 q8, q4, d0[0] ; cospi8sqrt2minus1 80 ; q4: 4 + 4 * cospi : d1/temp1 82 vqadd.s16 q4, q4, q8 88 vqadd.s16 q3, q4, q [all...] |
H A D | sixtappredict4x4_neon.asm | 62 vld1.u8 {q4}, [r0], r1 88 vmov q4, q3 ;keep original src data in q4 q6 93 vshr.u64 q9, q4, #8 ;construct src_ptr[-1] 100 vshr.u64 q3, q4, #32 ;construct src_ptr[2] 107 vshr.u64 q9, q4, #16 ;construct src_ptr[0] 114 vshr.u64 q3, q4, #24 ;construct src_ptr[1] 125 vld1.u8 {q4}, [r0], r1 154 vmov q4, q3 ;keep original src data in q4 q [all...] |
H A D | shortidct4x4llm_neon.asm | 48 vqdmulh.s16 q4, q2, d0[0] 54 vshr.s16 q4, q4, #1 57 vqadd.s16 q4, q4, q2 80 vqdmulh.s16 q4, q2, d0[0] 86 vshr.s16 q4, q4, #1 89 vqadd.s16 q4, q4, q [all...] |
H A D | vp8_subpixelvariance16x16s_neon.asm | 57 vext.8 q5, q4, q5, #1 63 vrhadd.u8 q2, q4, q5 66 vsubl.u8 q4, d0, d22 ;diff 75 vpadal.s16 q8, q4 ;sum 147 vld1.u8 {q4}, [r0], r1 155 vrhadd.u8 q2, q2, q4 156 vrhadd.u8 q4, q4, q6 255 vext.8 q5, q4, q5, #1 260 vrhadd.u8 q2, q4, q [all...] |
H A D | buildintrapredictorsmby_neon.asm | 62 vpaddl.u32 q4, q3 244 vmull.u8 q4, d16, d0 247 vsub.s16 q4, q4, q7 264 vqadd.s16 q8, q0, q4 267 vqadd.s16 q10, q1, q4 270 vqadd.s16 q12, q2, q4 273 vqadd.s16 q14, q3, q4 345 vpaddl.u32 q4, q3 527 vmull.u8 q4, d1 [all...] |
H A D | sixtappredict8x8_neon.asm | 72 vld1.u8 {q4}, [r0], r1 134 vmull.u8 q4, d29, d3 141 vqadd.s16 q8, q4 153 vld1.u8 {q4}, [r0], r1 164 ;vld1.u8 {q4}, [r0], r1 230 vmull.u8 q4, d28, d3 236 vqadd.s16 q9, q4 273 vmull.u8 q4, d19, d0 278 vmlsl.u8 q4, d20, d1 283 vmlsl.u8 q4, d2 [all...] |
H A D | sixtappredict8x4_neon.asm | 69 vld1.u8 {q4}, [r0], r1 130 vmull.u8 q4, d29, d3 135 vqadd.s16 q8, q4 146 vld1.u8 {q4}, [r0], r1 217 vmull.u8 q4, d28, d3 223 vqadd.s16 q9, q4 255 vmull.u8 q4, d23, d0 260 vmlsl.u8 q4, d24, d1 265 vmlsl.u8 q4, d27, d4 270 vmlal.u8 q4, d2 [all...] |
H A D | vp8_subpixelvariance8x8_neon.asm | 47 vld1.u8 {q4}, [r0], r1 70 vld1.u8 {q4}, [r0], r1 116 vmull.u8 q4, d25, d0 125 vmlal.u8 q4, d26, d1 134 vqrshrn.u16 d25, q4, #7 170 vsubl.u8 q4, d22, d0 ;calculate diff 176 vpadal.s16 q8, q4 ;sum
|
H A D | sixtappredict16x16_neon.asm | 168 vmull.u8 q4, d28, d3 ;(src_ptr[1] * vp8_filter[3]) 172 vqadd.s16 q8, q4 ;sum of all (src_data*filter_parameters) 233 vmull.u8 q4, d19, d0 238 vmlsl.u8 q4, d20, d1 243 vmlsl.u8 q4, d23, d4 248 vmlal.u8 q4, d21, d2 253 vmlal.u8 q4, d24, d5 265 vqadd.s16 q8, q4 383 vst1.u8 {q4}, [r4], r5 423 vmull.u8 q4, d1 [all...] |
H A D | vp8_subpixelvariance16x16_neon.asm | 149 vmull.u8 q4, d15, d0 161 vmlal.u8 q4, d15, d1 172 vqrshrn.u16 d19, q4, #7 203 vmull.u8 q4, d25, d0 212 vmlal.u8 q4, d27, d1 223 vqrshrn.u16 d5, q4, #7 330 vmull.u8 q4, d25, d0 339 vmlal.u8 q4, d27, d1 348 vqrshrn.u16 d5, q4, #7
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp9/common/arm/neon/ |
H A D | vp9_idct32x32_add_neon.asm | 171 ; q4-q7 contain the results (out[j * 32 + 0-31]) 183 vrshr.s16 q4, q4, #6 188 vaddw.u8 q4, q4, d4 193 vqmovun.s16 d4, q4 205 ; q4-q7 contain the results (out[j * 32 + 0-31]) 217 vrshr.s16 q4, q4, #6 222 vaddw.u8 q4, q [all...] |
H A D | vp9_idct16x16_add_neon.asm | 206 vsub.s16 q13, q4, q5 ; step2[5] = step1[4] - step1[5]; 207 vadd.s16 q4, q4, q5 ; step2[4] = step1[4] + step1[5]; 251 vadd.s16 q11, q3, q4 ; step2[3] = step1[3] + step1[4]; 252 vsub.s16 q12, q3, q4 ; step2[4] = step1[3] - step1[4]; 340 vmull.s16 q4, d17, d13 348 vmlal.s16 q4, d31, d12 359 vqrshrn.s32 d15, q4, #14 ; >> 14 375 vmull.s16 q4, d24, d31 383 vmlal.s16 q4, d2 [all...] |
H A D | vp9_iht8x8_add_neon.asm | 241 vsub.s16 q13, q4, q5 ; step2[5] = step1[4] - step1[5] 242 vadd.s16 q4, q4, q5 ; step2[4] = step1[4] + step1[5] 274 vadd.s16 q11, q3, q4 ; output[3] = step1[3] + step1[4]; 275 vsub.s16 q12, q3, q4 ; output[4] = step1[3] - step1[4]; 296 vmull.s16 q4, d31, d15 307 vmlsl.s16 q4, d17, d14 342 vadd.s32 q15, q4, q8 346 vsub.s32 q4, q4, q [all...] |
H A D | vp9_mb_lpf_neon.asm | 52 vld1.u8 {d12}, [r8@64], r1 ; q4 161 vtrn.32 q4, q6 166 vtrn.16 q4, q5 304 ; d12 q4 365 ; flatmask5(1, p7, p6, p5, p4, p0, q0, q4, q5, q6, q7) 367 vabd.u8 d23, d12, d8 ; abs(q4 - q0) 376 vmax.u8 d22, d22, d23 ; max(abs(p4 - p0), abs(q4 - q0)) 533 vaddw.u8 q15, d12 ; op2 += q4 561 vaddl.u8 q4, d10, d15 568 vadd.i16 q15, q4 [all...] |
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/encoder/arm/neon/ |
H A D | fastquantizeb_neon.asm | 27 vstmdb sp!, {q4-q7} 37 vabs.s16 q4, q0 ; calculate x = abs(z) 49 vadd.s16 q4, q6 ; x + Round 54 vqdmulh.s16 q4, q8 ; y = ((Round+abs(z)) * Quant) >> 16 63 veor.s16 q4, q2 ; y^sz 74 vshr.s16 q4, #1 ; right shift 1 after vqdmulh 79 vsub.s16 q4, q2 ; x1=(y^sz)-sz = (y^sz)-(-1) (2's complement) 90 vst1.s16 {q4, q5}, [r7] ; store: qcoeff = x1 98 vmul.s16 q2, q6, q4 ; x * Dequant 112 vtst.16 q14, q4, q [all...] |
H A D | vp8_memcpy_neon.asm | 36 vld1.8 {q4, q5}, [r1]! 39 vst1.8 {q4, q5}, [r0]!
|
/hardware/samsung_slsi/exynos5/libswconverter/ |
H A D | csc_ARGB8888_to_YUV420SP_NEON.s | 38 @q4: B 78 vand.u16 q4,#0x00FF @R 84 vmls.u16 q8,q4,q11 @q0:U -(38 * R[k]) @128<<6+ 32 + u>>2 89 vmla.u16 q7,q4,q13 @112 * R[k] 106 vmul.u16 q7,q4,q14 @q0 = 66 *R[k] 120 vshr.u16 q4,q4,#8 @R 124 vmul.u16 q0,q4,q14 @q0 = 66 *R[k] 151 vand.u16 q4,#0x00FF @R 157 vmul.u16 q7,q4,q1 [all...] |
H A D | csc_interleave_memcpy_neon.s | 78 vld1.8 {q4}, [r11]! 95 vst2.8 {q4, q5}, [r10]!
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp9/common/mips/dspr2/ |
H A D | vp9_loopfilter_masks_dspr2.h | 286 uint32_t q3, uint32_t q4, 301 /* flat |= (abs(q4 - q0) > thresh) */ 302 "subu_s.qb %[c], %[q4], %[q0] \n\t" 303 "subu_s.qb %[r_k], %[q0], %[q4] \n\t" 362 [q2] "r" (q2), [q3] "r" (q3), [q4] "r" (q4), 282 vp9_flatmask5(uint32_t p4, uint32_t p3, uint32_t p2, uint32_t p1, uint32_t p0, uint32_t q0, uint32_t q1, uint32_t q2, uint32_t q3, uint32_t q4, uint32_t *flat2) argument
|
H A D | vp9_loopfilter_macros_dspr2.h | 373 "preceu.ph.qbl %[q4_l], %[q4] \n\t" \ 383 [q4] "r" (q4), [q5] "r" (q5), [q6] "r" (q6), [q7] "r" (q7) \ 413 "preceu.ph.qbr %[q4_r], %[q4] \n\t" \ 423 [q4] "r" (q4), [q5] "r" (q5), [q6] "r" (q6), [q7] "r" (q7) \ 454 "precr.qb.ph %[q4], %[q4_l], %[q4_r] \n\t" \ 460 [q3] "=&r" (q3),[q4] "=&r" (q4), \
|
H A D | vp9_loopfilter_filters_dspr2.h | 537 const uint32_t q4 = *oq4, q5 = *oq5, q6 = *oq6, q7 = *oq7; local 545 /* addition of p6,p5,p4,p3,p2,p1,p0,q0,q1,q2,q3,q4,q5,q6 557 "addu.ph %[add_p6toq6], %[add_p6toq6], %[q4] \n\t" 566 [q4] "r" (q4), [q5] "r" (q5), [q6] "r" (q6), 580 "subu.ph %[res_op6], %[res_op6], %[q4] \n\t" 594 "subu.ph %[res_op5], %[res_op5], %[q4] \n\t" 606 "subu.ph %[res_op4], %[res_op4], %[q4] \n\t" 616 "subu.ph %[res_op3], %[res_op3], %[q4] \n\t" 622 p0 + q0 + q1 + q2 + q3 + q4, [all...] |
H A D | vp9_mblpf_vert_loopfilter_dspr2.c | 34 uint32_t p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7; local 91 "lw %[q4], +4(%[s4]) \n\t" 95 [q5] "=&r" (q5), [q4] "=&r" (q4) 213 /* transpose q4, q5, q6, q7 219 q4 q4_0 q4_1 q4_2 q4_3 226 q4 q4_0 q5_0 q26_0 q7_0 231 "precrq.qb.ph %[prim3], %[q5], %[q4] \n\t" 232 "precr.qb.ph %[prim4], %[q5], %[q4] \n\t" 235 "precr.qb.ph %[q4], [all...] |
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp9/common/ |
H A D | vp9_loopfilter_filters.c | 57 uint8_t q3, uint8_t q4) { 60 mask |= (abs(q4 - q0) > thresh) * -1; 246 q4 = *oq4, q5 = *oq5, q6 = *oq6, q7 = *oq7; local 258 q0 + q1 + q2 + q3 + q4, 4); 260 q0 + q1 + q2 + q3 + q4 + q5, 4); 262 q0 + q1 + q2 + q3 + q4 + q5 + q6, 4); 264 q0 * 2 + q1 + q2 + q3 + q4 + q5 + q6 + q7, 4); 266 q0 + q1 * 2 + q2 + q3 + q4 + q5 + q6 + q7 * 2, 4); 268 q0 + q1 + q2 * 2 + q3 + q4 + q5 + q6 + q7 * 3, 4); 270 q0 + q1 + q2 + q3 * 2 + q4 52 flat_mask5(uint8_t thresh, uint8_t p4, uint8_t p3, uint8_t p2, uint8_t p1, uint8_t p0, uint8_t q0, uint8_t q1, uint8_t q2, uint8_t q3, uint8_t q4) argument [all...] |
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vpx_scale/arm/neon/ |
H A D | vp8_vpxyv12_copysrcframe_func_neon.asm | 54 vld1.8 {q4, q5}, [r10]! 64 vst1.8 {q4, q5}, [r11]! 168 vld1.8 {q4, q5}, [r10]! 174 vst1.8 {q4, q5}, [r11]!
|