omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.S revision 78e52bfac041d71ce53b5b13c2abf78af742b09d
1/*
2 * Copyright (C) 2007-2008 ARM Limited
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17/*
18 *
19 */
20
21    .eabi_attribute 24, 1
22    .eabi_attribute 25, 1
23
24    .arm
25    .fpu neon
26    .text
27
28    .global omxVCM4P10_DequantTransformResidualFromPairAndAdd
29    .func   omxVCM4P10_DequantTransformResidualFromPairAndAdd
30omxVCM4P10_DequantTransformResidualFromPairAndAdd:
31    PUSH     {r4-r12,lr}
32    VPUSH    {d8-d9}
33    SUB      sp,sp,#0x20
34    ADD      r4,sp,#0
35    LDR      r5,[sp,#0x64]
36    MOV      r7,r1
37    MOV      r8,r2
38    MOV      r9,r3
39    CMP      r5,#0
40    BEQ      L0x114
41    MOV      r1,r4
42    BL       armVCM4P10_UnpackBlock4x4  ;//
43    LDR      r1,[sp,#0x60]
44    LDR      r11, .LarmVCM4P10_QPModuloTable
45P0: ADD      r11, pc
46    LDR      r10, .LarmVCM4P10_QPDivTable
47P1: ADD      r10, pc
48    LDR      r2, .LarmVCM4P10_VMatrixU16
49P2: ADD      r2, pc
50    LDRSB    r12,[r11,r1]
51    LDRSB    lr,[r10,r1]
52    LDR      r10, =0x3020504
53    LDR      r1, =0x5040100
54    ADD      r2,r2,r12
55    VDUP.32  d7,r1
56    VDUP.32  d9,r10
57    VDUP.16  d5,lr
58    VLD1.8   {d6},[r2]
59    VTBL.8   d8,{d6},d7
60    VTBL.8   d4,{d6},d9
61    CMP      r8,#0
62    VLD1.16  {d0,d1,d2,d3},[r4]
63    VSHL.U16 d8,d8,d5
64    VSHL.U16 d4,d4,d5
65    BEQ      L1
66    LDRSH    r10,[r8,#0]
67L1:
68    VMUL.I16 d0,d0,d8
69    VMUL.I16 d1,d1,d4
70    VMUL.I16 d2,d2,d8
71    VMUL.I16 d3,d3,d4
72    VMOVNE.16 d0[0],r10
73    VTRN.16  d0,d1
74    VTRN.16  d2,d3
75    VTRN.32  q0,q1
76    VMOV.I16 d4,#0
77    VADD.I16 d5,d0,d2
78    VSUB.I16 d6,d0,d2
79    VHADD.S16 d7,d1,d4
80    VHADD.S16 d8,d3,d4
81    VSUB.I16 d7,d7,d3
82    VADD.I16 d8,d1,d8
83    VADD.I16 d0,d5,d8
84    VADD.I16 d1,d6,d7
85    VSUB.I16 d2,d6,d7
86    VSUB.I16 d3,d5,d8
87    VTRN.16  d0,d1
88    VTRN.16  d2,d3
89    VTRN.32  q0,q1
90    VADD.I16 d5,d0,d2
91    VSUB.I16 d6,d0,d2
92    VHADD.S16 d7,d1,d4
93    VHADD.S16 d8,d3,d4
94    VSUB.I16 d7,d7,d3
95    VADD.I16 d8,d1,d8
96    VADD.I16 d0,d5,d8
97    VADD.I16 d1,d6,d7
98    VSUB.I16 d2,d6,d7
99    VSUB.I16 d3,d5,d8
100    VRSHR.S16 d0,d0,#6
101    VRSHR.S16 d1,d1,#6
102    VRSHR.S16 d2,d2,#6
103    VRSHR.S16 d3,d3,#6
104    B        L0x130
105L0x114:
106    LDRSH    r10,[r8,#0]
107    ADD      r10,r10,#0x20
108    ASR      r10,r10,#6
109    VDUP.16  d0,r10
110    VDUP.16  d1,r10
111    VDUP.16  d2,r10
112    VDUP.16  d3,r10
113L0x130:
114    LDR      r1,[sp,#0x58]
115    LDR      r10,[sp,#0x5c]
116    LDR      r3,[r7],r1
117    LDR      r5,[r7],r1
118    VMOV     d4,r3,r5
119    LDR      r3,[r7],r1
120    LDR      r5,[r7,#0]
121    VMOV     d5,r3,r5
122    VADDW.U8 q3,q0,d4
123    VADDW.U8 q4,q1,d5
124    VQMOVUN.S16 d0,q3
125    VQMOVUN.S16 d1,q4
126    VST1.32  {d0[0]},[r9],r10
127    VST1.32  {d0[1]},[r9],r10
128    VST1.32  {d1[0]},[r9],r10
129    VST1.32  {d1[1]},[r9]
130    MOV      r0,#0
131    ADD      sp,sp,#0x20
132    VPOP     {d8-d9}
133    POP      {r4-r12,pc}
134    .endfunc
135
136.LarmVCM4P10_QPModuloTable:
137    .word   armVCM4P10_QPModuloTable-(P0+8)
138.LarmVCM4P10_QPDivTable:
139    .word   armVCM4P10_QPDivTable-(P1+8)
140.LarmVCM4P10_VMatrixU16:
141    .word   armVCM4P10_VMatrixU16-(P2+8)
142
143    .end
144
145