Searched refs:transpose (Results 1 - 23 of 23) sorted by relevance

/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/encoder/arm/neon/
H A Dshortfdct_neon.asm43 ; transpose d0=ip[0], d1=ip[1], d2=ip[2], d3=ip[3]
72 ; transpose d0=ip[0], d1=ip[4], d2=ip[8], d3=ip[12]
124 ; transpose q0=ip[0], q1=ip[1], q2=ip[2], q3=ip[3]
165 ; transpose q0=ip[0], q1=ip[4], q2=ip[8], q3=ip[12]
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp9/common/arm/neon/
H A Dvp9_iht4x4_add_neon.asm155 ; transpose the input data
172 ; transpose the matrix
188 ; transpose the matrix
203 ; transpose the matrix
H A Dvp9_iht8x8_add_neon.asm581 ; transpose the input data
597 ; transpose the matrix
615 ; transpose the matrix
633 ; transpose the matrix
H A Dvp9_idct32x32_add_neon.asm299 ; r8 transpose loop counter
320 ; internal buffer used to transpose 8 lines into before transforming them
323 ; results of the first pass (transpose and transform rows)
326 ; results of the second pass (transpose and transform columns)
355 mov r8, #2 ; initialize transpose loop counter
434 ; transpose pair loop processing
H A Dvp9_convolve8_avg_neon.asm154 ; transpose
H A Dvp9_convolve8_neon.asm146 ; transpose
H A Dvp9_loopfilter_neon.asm124 ;transpose to 8x16 matrix
165 ; necessary load, transpose (if necessary) and store. The function does not use
375 ;transpose to 8x16 matrix
427 ; necessary load, transpose (if necessary) and store. The function does not use
H A Dvp9_idct16x16_add_neon.asm71 ; transpose the input data
318 ; transpose the input data
821 ; transpose the input data
950 ; transpose the input data
H A Dvp9_mb_lpf_neon.asm286 ; necessary load, transpose (if necessary) and store.
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/common/x86/
H A Didctllm_sse2.asm126 ; note the transpose of xmm1 and xmm2, necessary for shuffle
198 ; transpose for the second pass
276 ; transpose to save
458 ; note the transpose of xmm1 and xmm2, necessary for shuffle
535 ; transpose for the second pass
613 ; transpose to save
H A Dloopfilter_sse2.asm952 ;transpose 16x8 to 8x16, and store the 8-line result on stack.
961 ; transpose and write back - only work on q1, q0, p0, p1
1017 ;transpose 16x8 to 8x16, and store the 8-line result on stack.
1026 ; transpose and write back - only work on q1, q0, p0, p1
1179 ; transpose and write back
1242 ; transpose and write back
1541 ; transpose back to write out
H A Dloopfilter_mmx.asm263 ;transpose
530 ; transpose and write back
948 ;transpose
1292 ; transpose and write back
1655 ; transpose back to write out
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/common/ppc/
H A Didctllm_altivec.asm73 ;# transpose input
128 ;# transpose output
H A Dloopfilter_filters_altivec.asm26 ;# We often need to perform transposes (and other transpose-like operations)
43 ;# A 16x16 transpose can then be thought of as an operation on
45 ;# memory and the effect of a transpose is to interchange address bit
89 ;# edges together. This requires a single 16x16 transpose, which, in
103 ;# For clarity, and because we can afford it, we do this transpose
137 ;# Whole transpose takes 4*16 = 64 instructions
173 ;# In other words, we transpose each of the four 4x4 submatrices.
216 ;# Normal mb vertical edge filter transpose.
247 ;# Inverse transpose is similar, except here I -> (I+3) mod 7 and the
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/encoder/x86/
H A Ddct_mmx.asm38 ; transpose for the first stage
114 ; transpose for the second stage
H A Ddct_sse2.asm181 ; transpose for the first stage
266 ; transpose for the second stage
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/common/arm/armv6/
H A Dsixtappredict8x4_v6.asm23 ;note: In first pass, store the result in transpose(8linesx9columns) on stack. Temporary stack size is 184.
25 ;and the result is stored in transpose.
H A Dloopfilter_v6.asm610 ; transpose the source data for 4-in-parallel operation
635 ; transpose uses 8 regs(r6 - r12 and lr). Need to save reg value now
636 ; transpose the source data for 4-in-parallel operation
727 ;transpose r12, r11, r6, lr to r7, r8, r9, r10
811 ;we can use TRANSPOSE_MATRIX macro to transpose output - input: q1, q0, p0, p1
887 ; transpose the source data for 4-in-parallel operation
912 ; transpose uses 8 regs(r6 - r12 and lr). Need to save reg value now
913 ; transpose the source data for 4-in-parallel operation
1009 ;transpose r12, r11, r6, lr to p1, p0, q0, q1
H A Dsimpleloopfilter_v6.asm176 ;transpose r7, r8, r9, r10 to r3, r4, r5, r6
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/encoder/ppc/
H A Dfdct_altivec.asm77 ;# forward transform uses transpose.
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp9/common/x86/
H A Dvp9_loopfilter_intrin_sse2.c1533 static INLINE void transpose(unsigned char *src[], int in_p, function
1618 transpose(src, 16, dst, p, 2);
1634 transpose(src, p, dst, 8, 1);
1643 transpose(src, 8, dst, p, 1);
1669 transpose(src, 16, dst, p, 2);
1686 transpose(src, p, dst, 8, 2);
1697 transpose(src, 8, dst, p, 2);
H A Dvp9_loopfilter_mmx.asm263 ;transpose
530 ; transpose and write back
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/third_party/x86inc/
H A Dx86inc.asm792 ; efficient way to implement butterfly or transpose or dct without swapping some

Completed in 411 milliseconds