/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/encoder/arm/neon/ |
H A D | shortfdct_neon.asm | 43 ; transpose d0=ip[0], d1=ip[1], d2=ip[2], d3=ip[3] 72 ; transpose d0=ip[0], d1=ip[4], d2=ip[8], d3=ip[12] 124 ; transpose q0=ip[0], q1=ip[1], q2=ip[2], q3=ip[3] 165 ; transpose q0=ip[0], q1=ip[4], q2=ip[8], q3=ip[12]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp9/common/arm/neon/ |
H A D | vp9_iht4x4_add_neon.asm | 155 ; transpose the input data 172 ; transpose the matrix 188 ; transpose the matrix 203 ; transpose the matrix
|
H A D | vp9_iht8x8_add_neon.asm | 581 ; transpose the input data 597 ; transpose the matrix 615 ; transpose the matrix 633 ; transpose the matrix
|
H A D | vp9_idct32x32_add_neon.asm | 299 ; r8 transpose loop counter 320 ; internal buffer used to transpose 8 lines into before transforming them 323 ; results of the first pass (transpose and transform rows) 326 ; results of the second pass (transpose and transform columns) 355 mov r8, #2 ; initialize transpose loop counter 434 ; transpose pair loop processing
|
H A D | vp9_convolve8_avg_neon.asm | 154 ; transpose
|
H A D | vp9_convolve8_neon.asm | 146 ; transpose
|
H A D | vp9_loopfilter_neon.asm | 124 ;transpose to 8x16 matrix 165 ; necessary load, transpose (if necessary) and store. The function does not use 375 ;transpose to 8x16 matrix 427 ; necessary load, transpose (if necessary) and store. The function does not use
|
H A D | vp9_idct16x16_add_neon.asm | 71 ; transpose the input data 318 ; transpose the input data 821 ; transpose the input data 950 ; transpose the input data
|
H A D | vp9_mb_lpf_neon.asm | 286 ; necessary load, transpose (if necessary) and store.
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/common/x86/ |
H A D | idctllm_sse2.asm | 126 ; note the transpose of xmm1 and xmm2, necessary for shuffle 198 ; transpose for the second pass 276 ; transpose to save 458 ; note the transpose of xmm1 and xmm2, necessary for shuffle 535 ; transpose for the second pass 613 ; transpose to save
|
H A D | loopfilter_sse2.asm | 952 ;transpose 16x8 to 8x16, and store the 8-line result on stack. 961 ; transpose and write back - only work on q1, q0, p0, p1 1017 ;transpose 16x8 to 8x16, and store the 8-line result on stack. 1026 ; transpose and write back - only work on q1, q0, p0, p1 1179 ; transpose and write back 1242 ; transpose and write back 1541 ; transpose back to write out
|
H A D | loopfilter_mmx.asm | 263 ;transpose 530 ; transpose and write back 948 ;transpose 1292 ; transpose and write back 1655 ; transpose back to write out
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/common/ppc/ |
H A D | idctllm_altivec.asm | 73 ;# transpose input 128 ;# transpose output
|
H A D | loopfilter_filters_altivec.asm | 26 ;# We often need to perform transposes (and other transpose-like operations) 43 ;# A 16x16 transpose can then be thought of as an operation on 45 ;# memory and the effect of a transpose is to interchange address bit 89 ;# edges together. This requires a single 16x16 transpose, which, in 103 ;# For clarity, and because we can afford it, we do this transpose 137 ;# Whole transpose takes 4*16 = 64 instructions 173 ;# In other words, we transpose each of the four 4x4 submatrices. 216 ;# Normal mb vertical edge filter transpose. 247 ;# Inverse transpose is similar, except here I -> (I+3) mod 7 and the
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/encoder/x86/ |
H A D | dct_mmx.asm | 38 ; transpose for the first stage 114 ; transpose for the second stage
|
H A D | dct_sse2.asm | 181 ; transpose for the first stage 266 ; transpose for the second stage
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/common/arm/armv6/ |
H A D | sixtappredict8x4_v6.asm | 23 ;note: In first pass, store the result in transpose(8linesx9columns) on stack. Temporary stack size is 184. 25 ;and the result is stored in transpose.
|
H A D | loopfilter_v6.asm | 610 ; transpose the source data for 4-in-parallel operation 635 ; transpose uses 8 regs(r6 - r12 and lr). Need to save reg value now 636 ; transpose the source data for 4-in-parallel operation 727 ;transpose r12, r11, r6, lr to r7, r8, r9, r10 811 ;we can use TRANSPOSE_MATRIX macro to transpose output - input: q1, q0, p0, p1 887 ; transpose the source data for 4-in-parallel operation 912 ; transpose uses 8 regs(r6 - r12 and lr). Need to save reg value now 913 ; transpose the source data for 4-in-parallel operation 1009 ;transpose r12, r11, r6, lr to p1, p0, q0, q1
|
H A D | simpleloopfilter_v6.asm | 176 ;transpose r7, r8, r9, r10 to r3, r4, r5, r6
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/encoder/ppc/ |
H A D | fdct_altivec.asm | 77 ;# forward transform uses transpose.
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp9/common/x86/ |
H A D | vp9_loopfilter_intrin_sse2.c | 1533 static INLINE void transpose(unsigned char *src[], int in_p, function 1618 transpose(src, 16, dst, p, 2); 1634 transpose(src, p, dst, 8, 1); 1643 transpose(src, 8, dst, p, 1); 1669 transpose(src, 16, dst, p, 2); 1686 transpose(src, p, dst, 8, 2); 1697 transpose(src, 8, dst, p, 2);
|
H A D | vp9_loopfilter_mmx.asm | 263 ;transpose 530 ; transpose and write back
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/third_party/x86inc/ |
H A D | x86inc.asm | 792 ; efficient way to implement butterfly or transpose or dct without swapping some
|