omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.S revision 0c1bc742181ded4930842b46e9507372f0b1b963
1/*
2 * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
3 *
4 */
5
6    .eabi_attribute 24, 1
7    .eabi_attribute 25, 1
8
9    .arm
10    .fpu neon
11    .text
12
13    .global omxVCM4P10_DequantTransformResidualFromPairAndAdd
14    .func   omxVCM4P10_DequantTransformResidualFromPairAndAdd
15omxVCM4P10_DequantTransformResidualFromPairAndAdd:
16    PUSH     {r4-r12,lr}
17    VPUSH    {d8-d9}
18    SUB      sp,sp,#0x20
19    ADD      r4,sp,#0
20    LDR      r5,[sp,#0x64]
21    MOV      r7,r1
22    MOV      r8,r2
23    MOV      r9,r3
24    CMP      r5,#0
25    BEQ      L0x114
26    MOV      r1,r4
27    BL       armVCM4P10_UnpackBlock4x4  ;//
28    LDR      r1,[sp,#0x60]
29    LDR      r11, =armVCM4P10_QPModuloTable
30    LDR      r10, =armVCM4P10_QPDivTable
31    LDR      r2,  =armVCM4P10_VMatrixU16
32    LDRSB    r12,[r11,r1]
33    LDRSB    lr,[r10,r1]
34    LDR      r10, =0x3020504
35    LDR      r1, =0x5040100
36    ADD      r2,r2,r12
37    VDUP.32  d7,r1
38    VDUP.32  d9,r10
39    VDUP.16  d5,lr
40    VLD1.8   {d6},[r2]
41    VTBL.8   d8,{d6},d7
42    VTBL.8   d4,{d6},d9
43    CMP      r8,#0
44    VLD1.16  {d0,d1,d2,d3},[r4]
45    VSHL.U16 d8,d8,d5
46    VSHL.U16 d4,d4,d5
47    BEQ      L1
48    LDRSH    r10,[r8,#0]
49L1:
50    VMUL.I16 d0,d0,d8
51    VMUL.I16 d1,d1,d4
52    VMUL.I16 d2,d2,d8
53    VMUL.I16 d3,d3,d4
54    VMOVNE.16 d0[0],r10
55    VTRN.16  d0,d1
56    VTRN.16  d2,d3
57    VTRN.32  q0,q1
58    VMOV.I16 d4,#0
59    VADD.I16 d5,d0,d2
60    VSUB.I16 d6,d0,d2
61    VHADD.S16 d7,d1,d4
62    VHADD.S16 d8,d3,d4
63    VSUB.I16 d7,d7,d3
64    VADD.I16 d8,d1,d8
65    VADD.I16 d0,d5,d8
66    VADD.I16 d1,d6,d7
67    VSUB.I16 d2,d6,d7
68    VSUB.I16 d3,d5,d8
69    VTRN.16  d0,d1
70    VTRN.16  d2,d3
71    VTRN.32  q0,q1
72    VADD.I16 d5,d0,d2
73    VSUB.I16 d6,d0,d2
74    VHADD.S16 d7,d1,d4
75    VHADD.S16 d8,d3,d4
76    VSUB.I16 d7,d7,d3
77    VADD.I16 d8,d1,d8
78    VADD.I16 d0,d5,d8
79    VADD.I16 d1,d6,d7
80    VSUB.I16 d2,d6,d7
81    VSUB.I16 d3,d5,d8
82    VRSHR.S16 d0,d0,#6
83    VRSHR.S16 d1,d1,#6
84    VRSHR.S16 d2,d2,#6
85    VRSHR.S16 d3,d3,#6
86    B        L0x130
87L0x114:
88    LDRSH    r10,[r8,#0]
89    ADD      r10,r10,#0x20
90    ASR      r10,r10,#6
91    VDUP.16  d0,r10
92    VDUP.16  d1,r10
93    VDUP.16  d2,r10
94    VDUP.16  d3,r10
95L0x130:
96    LDR      r1,[sp,#0x58]
97    LDR      r10,[sp,#0x5c]
98    LDR      r3,[r7],r1
99    LDR      r5,[r7],r1
100    VMOV     d4,r3,r5
101    LDR      r3,[r7],r1
102    LDR      r5,[r7,#0]
103    VMOV     d5,r3,r5
104    VADDW.U8 q3,q0,d4
105    VADDW.U8 q4,q1,d5
106    VQMOVUN.S16 d0,q3
107    VQMOVUN.S16 d1,q4
108    VST1.32  {d0[0]},[r9],r10
109    VST1.32  {d0[1]},[r9],r10
110    VST1.32  {d1[0]},[r9],r10
111    VST1.32  {d1[1]},[r9]
112    MOV      r0,#0
113    ADD      sp,sp,#0x20
114    VPOP     {d8-d9}
115    POP      {r4-r12,pc}
116    .endfunc
117
118    .end
119
120