omxVCM4P10_PredictIntra_4x4_s.S revision 78e52bfac041d71ce53b5b13c2abf78af742b09d
1/*
2 * Copyright (C) 2007-2008 ARM Limited
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17/*
18 *
19 */
20
21    .eabi_attribute 24, 1
22    .eabi_attribute 25, 1
23
24    .arm
25    .fpu neon
26
27    .text
28    .align 4
29
30armVCM4P10_pSwitchTable4x4:
31    .word OMX_VC_4x4_VERT-(P0+8),     OMX_VC_4x4_HOR-(P0+8)
32    .word OMX_VC_4x4_DC-(P0+8),       OMX_VC_4x4_DIAG_DL-(P0+8)
33    .word OMX_VC_4x4_DIAG_DR-(P0+8),  OMX_VC_4x4_VR-(P0+8)
34    .word OMX_VC_4x4_HD-(P0+8),       OMX_VC_4x4_VL-(P0+8)
35    .word OMX_VC_4x4_HU-(P0+8)
36
37    .global omxVCM4P10_PredictIntra_4x4
38    .func   omxVCM4P10_PredictIntra_4x4
39omxVCM4P10_PredictIntra_4x4:
40    PUSH     {r4-r12,lr}
41    VPUSH    {d8-d12}
42    ADR      r8, armVCM4P10_pSwitchTable4x4
43    LDRD     r6,r7,[sp,#0x58]
44    LDRD     r4,r5,[sp,#0x50]
45    LDR      r8,[r8,r6,LSL #2]
46P0: ADD      pc, r8
47
48OMX_VC_4x4_HOR:
49    ADD      r9,r0,r4
50    ADD      r10,r4,r4
51    VLD1.8   {d0[]},[r0],r10
52    VLD1.8   {d1[]},[r9],r10
53    VLD1.8   {d2[]},[r0]
54    VLD1.8   {d3[]},[r9]
55    ADD      r11,r3,r5
56    ADD      r12,r5,r5
57    VST1.32  {d0[0]},[r3],r12
58    VST1.32  {d1[0]},[r11],r12
59    VST1.32  {d2[0]},[r3]
60    VST1.32  {d3[0]},[r11]
61    B        L0x348
62OMX_VC_4x4_VERT:
63    VLD1.32  {d0[0]},[r1]
64    ADD      r11,r3,r5
65    ADD      r12,r5,r5
66L0x58:
67    VST1.32  {d0[0]},[r3],r12
68    VST1.32  {d0[0]},[r11],r12
69    VST1.32  {d0[0]},[r3]
70    VST1.32  {d0[0]},[r11]
71    B        L0x348
72OMX_VC_4x4_DC:
73    TST      r7,#2
74    BEQ      L0xdc
75    ADD      r9,r0,r4
76    ADD      r10,r4,r4
77    VLD1.8   {d0[0]},[r0],r10
78    VLD1.8   {d0[1]},[r9],r10
79    VLD1.8   {d0[2]},[r0]
80    VLD1.8   {d0[3]},[r9]
81    TST      r7,#1
82    BEQ      L0xbc
83    VLD1.32  {d0[1]},[r1]
84    MOV      r0,#0
85    VPADDL.U8 d1,d0
86    VPADDL.U16 d1,d1
87    VPADDL.U32 d1,d1
88    VRSHR.U64 d1,d1,#3
89    ADD      r11,r3,r5
90    ADD      r12,r5,r5
91    VDUP.8   d0,d1[0]
92    B        L0x58
93L0xbc:
94    MOV      r0,#0
95    VPADDL.U8 d1,d0
96    VPADDL.U16 d1,d1
97    VRSHR.U32 d1,d1,#2
98    ADD      r11,r3,r5
99    ADD      r12,r5,r5
100    VDUP.8   d0,d1[0]
101    B        L0x58
102L0xdc:
103    TST      r7,#1
104    BEQ      L0x108
105    VLD1.32  {d0[0]},[r1]
106    MOV      r0,#0
107    VPADDL.U8 d1,d0
108    VPADDL.U16 d1,d1
109    VRSHR.U32 d1,d1,#2
110    ADD      r11,r3,r5
111    ADD      r12,r5,r5
112    VDUP.8   d0,d1[0]
113    B        L0x58
114L0x108:
115    VMOV.I8  d0,#0x80
116    MOV      r0,#0
117    ADD      r11,r3,r5
118    ADD      r12,r5,r5
119    B        L0x58
120OMX_VC_4x4_DIAG_DL:
121    TST      r7,#0x40
122    BEQ      L0x138
123    VLD1.8   {d3},[r1]
124    VDUP.8   d2,d3[7]
125    VEXT.8   d4,d3,d2,#1
126    VEXT.8   d5,d3,d2,#2
127    B        L0x14c
128L0x138:
129    VLD1.32  {d0[1]},[r1]
130    VDUP.8   d2,d0[7]
131    VEXT.8   d3,d0,d2,#4
132    VEXT.8   d4,d0,d2,#5
133    VEXT.8   d5,d0,d2,#6
134L0x14c:
135    VHADD.U8 d6,d3,d5
136    VRHADD.U8 d6,d6,d4
137    VST1.32  {d6[0]},[r3],r5
138    VEXT.8   d6,d6,d6,#1
139    VST1.32  {d6[0]},[r3],r5
140    VEXT.8   d6,d6,d6,#1
141    VST1.32  {d6[0]},[r3],r5
142    VEXT.8   d6,d6,d6,#1
143    VST1.32  {d6[0]},[r3]
144    B        L0x348
145OMX_VC_4x4_DIAG_DR:
146    VLD1.32  {d0[0]},[r1]
147    VLD1.8   {d1[7]},[r2]
148    ADD      r9,r0,r4
149    ADD      r10,r4,r4
150    ADD      r1,r3,r5
151    VLD1.8   {d1[6]},[r0],r10
152    VLD1.8   {d1[5]},[r9],r10
153    VLD1.8   {d1[4]},[r0]
154    VLD1.8   {d1[3]},[r9]
155    VEXT.8   d3,d1,d0,#3
156    ADD      r4,r1,r5
157    VEXT.8   d4,d1,d0,#4
158    ADD      r6,r4,r5
159    VEXT.8   d5,d1,d0,#5
160    VHADD.U8 d6,d3,d5
161    VRHADD.U8 d6,d6,d4
162    VST1.32  {d6[0]},[r6]
163    VEXT.8   d6,d6,d6,#1
164    VST1.32  {d6[0]},[r4]
165    VEXT.8   d6,d6,d6,#1
166    VST1.32  {d6[0]},[r1]
167    VEXT.8   d6,d6,d6,#1
168    VST1.32  {d6[0]},[r3]
169    B        L0x348
170OMX_VC_4x4_VR:
171    VLD1.32  {d0[0]},[r1]
172    VLD1.8   {d0[7]},[r2]
173    VLD1.8   {d1[7]},[r0],r4
174    VLD1.8   {d2[7]},[r0],r4
175    VLD1.8   {d1[6]},[r0]
176    VEXT.8   d12,d0,d0,#7
177    VEXT.8   d3,d1,d12,#6
178    VEXT.8   d4,d2,d12,#7
179    VEXT.8   d5,d1,d0,#7
180    VEXT.8   d6,d2,d0,#7
181    VEXT.8   d11,d1,d12,#7
182    VHADD.U8 d8,d6,d12
183    VRHADD.U8 d8,d8,d11
184    VHADD.U8 d7,d3,d5
185    VRHADD.U8 d7,d7,d4
186    VEXT.8   d10,d8,d8,#1
187    ADD      r11,r3,r5
188    ADD      r12,r5,r5
189    VEXT.8   d9,d7,d7,#1
190    VST1.32  {d10[0]},[r3],r12
191    VST1.32  {d9[0]},[r11],r12
192    VST1.32  {d8[0]},[r3],r12
193    VST1.32  {d7[0]},[r11]
194    B        L0x348
195OMX_VC_4x4_HD:
196    VLD1.8   {d0},[r1]
197    VLD1.8   {d1[7]},[r2]
198    ADD      r9,r0,r4
199    ADD      r10,r4,r4
200    VLD1.8   {d1[6]},[r0],r10
201    VLD1.8   {d1[5]},[r9],r10
202    VLD1.8   {d1[4]},[r0]
203    VLD1.8   {d1[3]},[r9]
204    VEXT.8   d3,d1,d0,#3
205    VEXT.8   d4,d1,d0,#2
206    VEXT.8   d5,d1,d0,#1
207    VHADD.U8 d7,d3,d5
208    VRHADD.U8 d7,d7,d4
209    VRHADD.U8 d8,d4,d3
210    VSHL.I64 d8,d8,#24
211    VSHL.I64 d6,d7,#16
212    VZIP.8   d8,d6
213    VEXT.8   d7,d7,d7,#6
214    VEXT.8   d8,d6,d7,#2
215    ADD      r11,r3,r5
216    ADD      r12,r5,r5
217    VST1.32  {d8[1]},[r3],r12
218    VST1.32  {d6[1]},[r11],r12
219    VST1.32  {d8[0]},[r3]
220    VST1.32  {d6[0]},[r11]
221    B        L0x348
222OMX_VC_4x4_VL:
223    TST      r7,#0x40
224    BEQ      L0x2b4
225    VLD1.8   {d3},[r1]
226    VEXT.8   d4,d3,d3,#1
227    VEXT.8   d5,d4,d4,#1
228    B        L0x2c8
229L0x2b4:
230    VLD1.32  {d0[1]},[r1]
231    VDUP.8   d2,d0[7]
232    VEXT.8   d3,d0,d2,#4
233    VEXT.8   d4,d0,d2,#5
234    VEXT.8   d5,d0,d2,#6
235L0x2c8:
236    VRHADD.U8 d7,d4,d3
237    VHADD.U8 d10,d3,d5
238    VRHADD.U8 d10,d10,d4
239    VEXT.8   d8,d7,d7,#1
240    ADD      r11,r3,r5
241    ADD      r12,r5,r5
242    VEXT.8   d9,d10,d8,#1
243    VST1.32  {d7[0]},[r3],r12
244    VST1.32  {d10[0]},[r11],r12
245    VST1.32  {d8[0]},[r3]
246    VST1.32  {d9[0]},[r11]
247    B        L0x348
248OMX_VC_4x4_HU:
249    ADD      r9,r0,r4
250    ADD      r10,r4,r4
251    VLD1.8   {d1[4]},[r0],r10
252    VLD1.8   {d1[5]},[r9],r10
253    VLD1.8   {d1[6]},[r0]
254    VLD1.8   {d1[7]},[r9]
255    VDUP.8   d2,d1[7]
256    VEXT.8   d3,d1,d2,#4
257    VEXT.8   d4,d1,d2,#5
258    VEXT.8   d5,d1,d2,#6
259    VHADD.U8 d7,d3,d5
260    VRHADD.U8 d7,d7,d4
261    VRHADD.U8 d8,d4,d3
262    VZIP.8   d8,d7
263    VST1.32  {d8[0]},[r3],r5
264    VEXT.8   d8,d8,d8,#2
265    VST1.32  {d8[0]},[r3],r5
266    VEXT.8   d8,d8,d8,#2
267    VST1.32  {d8[0]},[r3],r5
268    VST1.32  {d7[0]},[r3]
269L0x348:
270    MOV      r0,#0
271    VPOP     {d8-d12}
272    POP      {r4-r12,pc}
273    .endfunc
274
275    .end
276