omxVCM4P10_PredictIntra_4x4_s.S revision 7ea582e1dbdd9a88b2105fbe29ed0ec92cbf70c6
1/*
2 * Copyright (C) 2007-2008 ARM Limited
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17/*
18 *
19 */
20
21    .eabi_attribute 24, 1
22    .eabi_attribute 25, 1
23
24    .arm
25    .fpu neon
26
27    .text
28    .align 4
29
30armVCM4P10_pSwitchTable4x4:
31    .word OMX_VC_4x4_VERT-(P0+8),     OMX_VC_4x4_HOR-(P0+8)
32    .word OMX_VC_4x4_DC-(P0+8),       OMX_VC_4x4_DIAG_DL-(P0+8)
33    .word OMX_VC_4x4_DIAG_DR-(P0+8),  OMX_VC_4x4_VR-(P0+8)
34    .word OMX_VC_4x4_HD-(P0+8),       OMX_VC_4x4_VL-(P0+8)
35    .word OMX_VC_4x4_HU-(P0+8)
36
37    .global omxVCM4P10_PredictIntra_4x4
38omxVCM4P10_PredictIntra_4x4:
39    PUSH     {r4-r12,lr}
40    VPUSH    {d8-d12}
41    ADR      r8, armVCM4P10_pSwitchTable4x4
42    LDRD     r6,r7,[sp,#0x58]
43    LDRD     r4,r5,[sp,#0x50]
44    LDR      r8,[r8,r6,LSL #2]
45P0: ADD      pc, r8
46
47OMX_VC_4x4_HOR:
48    ADD      r9,r0,r4
49    ADD      r10,r4,r4
50    VLD1.8   {d0[]},[r0],r10
51    VLD1.8   {d1[]},[r9],r10
52    VLD1.8   {d2[]},[r0]
53    VLD1.8   {d3[]},[r9]
54    ADD      r11,r3,r5
55    ADD      r12,r5,r5
56    VST1.32  {d0[0]},[r3],r12
57    VST1.32  {d1[0]},[r11],r12
58    VST1.32  {d2[0]},[r3]
59    VST1.32  {d3[0]},[r11]
60    B        L0x348
61OMX_VC_4x4_VERT:
62    VLD1.32  {d0[0]},[r1]
63    ADD      r11,r3,r5
64    ADD      r12,r5,r5
65L0x58:
66    VST1.32  {d0[0]},[r3],r12
67    VST1.32  {d0[0]},[r11],r12
68    VST1.32  {d0[0]},[r3]
69    VST1.32  {d0[0]},[r11]
70    B        L0x348
71OMX_VC_4x4_DC:
72    TST      r7,#2
73    BEQ      L0xdc
74    ADD      r9,r0,r4
75    ADD      r10,r4,r4
76    VLD1.8   {d0[0]},[r0],r10
77    VLD1.8   {d0[1]},[r9],r10
78    VLD1.8   {d0[2]},[r0]
79    VLD1.8   {d0[3]},[r9]
80    TST      r7,#1
81    BEQ      L0xbc
82    VLD1.32  {d0[1]},[r1]
83    MOV      r0,#0
84    VPADDL.U8 d1,d0
85    VPADDL.U16 d1,d1
86    VPADDL.U32 d1,d1
87    VRSHR.U64 d1,d1,#3
88    ADD      r11,r3,r5
89    ADD      r12,r5,r5
90    VDUP.8   d0,d1[0]
91    B        L0x58
92L0xbc:
93    MOV      r0,#0
94    VPADDL.U8 d1,d0
95    VPADDL.U16 d1,d1
96    VRSHR.U32 d1,d1,#2
97    ADD      r11,r3,r5
98    ADD      r12,r5,r5
99    VDUP.8   d0,d1[0]
100    B        L0x58
101L0xdc:
102    TST      r7,#1
103    BEQ      L0x108
104    VLD1.32  {d0[0]},[r1]
105    MOV      r0,#0
106    VPADDL.U8 d1,d0
107    VPADDL.U16 d1,d1
108    VRSHR.U32 d1,d1,#2
109    ADD      r11,r3,r5
110    ADD      r12,r5,r5
111    VDUP.8   d0,d1[0]
112    B        L0x58
113L0x108:
114    VMOV.I8  d0,#0x80
115    MOV      r0,#0
116    ADD      r11,r3,r5
117    ADD      r12,r5,r5
118    B        L0x58
119OMX_VC_4x4_DIAG_DL:
120    TST      r7,#0x40
121    BEQ      L0x138
122    VLD1.8   {d3},[r1]
123    VDUP.8   d2,d3[7]
124    VEXT.8   d4,d3,d2,#1
125    VEXT.8   d5,d3,d2,#2
126    B        L0x14c
127L0x138:
128    VLD1.32  {d0[1]},[r1]
129    VDUP.8   d2,d0[7]
130    VEXT.8   d3,d0,d2,#4
131    VEXT.8   d4,d0,d2,#5
132    VEXT.8   d5,d0,d2,#6
133L0x14c:
134    VHADD.U8 d6,d3,d5
135    VRHADD.U8 d6,d6,d4
136    VST1.32  {d6[0]},[r3],r5
137    VEXT.8   d6,d6,d6,#1
138    VST1.32  {d6[0]},[r3],r5
139    VEXT.8   d6,d6,d6,#1
140    VST1.32  {d6[0]},[r3],r5
141    VEXT.8   d6,d6,d6,#1
142    VST1.32  {d6[0]},[r3]
143    B        L0x348
144OMX_VC_4x4_DIAG_DR:
145    VLD1.32  {d0[0]},[r1]
146    VLD1.8   {d1[7]},[r2]
147    ADD      r9,r0,r4
148    ADD      r10,r4,r4
149    ADD      r1,r3,r5
150    VLD1.8   {d1[6]},[r0],r10
151    VLD1.8   {d1[5]},[r9],r10
152    VLD1.8   {d1[4]},[r0]
153    VLD1.8   {d1[3]},[r9]
154    VEXT.8   d3,d1,d0,#3
155    ADD      r4,r1,r5
156    VEXT.8   d4,d1,d0,#4
157    ADD      r6,r4,r5
158    VEXT.8   d5,d1,d0,#5
159    VHADD.U8 d6,d3,d5
160    VRHADD.U8 d6,d6,d4
161    VST1.32  {d6[0]},[r6]
162    VEXT.8   d6,d6,d6,#1
163    VST1.32  {d6[0]},[r4]
164    VEXT.8   d6,d6,d6,#1
165    VST1.32  {d6[0]},[r1]
166    VEXT.8   d6,d6,d6,#1
167    VST1.32  {d6[0]},[r3]
168    B        L0x348
169OMX_VC_4x4_VR:
170    VLD1.32  {d0[0]},[r1]
171    VLD1.8   {d0[7]},[r2]
172    VLD1.8   {d1[7]},[r0],r4
173    VLD1.8   {d2[7]},[r0],r4
174    VLD1.8   {d1[6]},[r0]
175    VEXT.8   d12,d0,d0,#7
176    VEXT.8   d3,d1,d12,#6
177    VEXT.8   d4,d2,d12,#7
178    VEXT.8   d5,d1,d0,#7
179    VEXT.8   d6,d2,d0,#7
180    VEXT.8   d11,d1,d12,#7
181    VHADD.U8 d8,d6,d12
182    VRHADD.U8 d8,d8,d11
183    VHADD.U8 d7,d3,d5
184    VRHADD.U8 d7,d7,d4
185    VEXT.8   d10,d8,d8,#1
186    ADD      r11,r3,r5
187    ADD      r12,r5,r5
188    VEXT.8   d9,d7,d7,#1
189    VST1.32  {d10[0]},[r3],r12
190    VST1.32  {d9[0]},[r11],r12
191    VST1.32  {d8[0]},[r3],r12
192    VST1.32  {d7[0]},[r11]
193    B        L0x348
194OMX_VC_4x4_HD:
195    VLD1.8   {d0},[r1]
196    VLD1.8   {d1[7]},[r2]
197    ADD      r9,r0,r4
198    ADD      r10,r4,r4
199    VLD1.8   {d1[6]},[r0],r10
200    VLD1.8   {d1[5]},[r9],r10
201    VLD1.8   {d1[4]},[r0]
202    VLD1.8   {d1[3]},[r9]
203    VEXT.8   d3,d1,d0,#3
204    VEXT.8   d4,d1,d0,#2
205    VEXT.8   d5,d1,d0,#1
206    VHADD.U8 d7,d3,d5
207    VRHADD.U8 d7,d7,d4
208    VRHADD.U8 d8,d4,d3
209    VSHL.I64 d8,d8,#24
210    VSHL.I64 d6,d7,#16
211    VZIP.8   d8,d6
212    VEXT.8   d7,d7,d7,#6
213    VEXT.8   d8,d6,d7,#2
214    ADD      r11,r3,r5
215    ADD      r12,r5,r5
216    VST1.32  {d8[1]},[r3],r12
217    VST1.32  {d6[1]},[r11],r12
218    VST1.32  {d8[0]},[r3]
219    VST1.32  {d6[0]},[r11]
220    B        L0x348
221OMX_VC_4x4_VL:
222    TST      r7,#0x40
223    BEQ      L0x2b4
224    VLD1.8   {d3},[r1]
225    VEXT.8   d4,d3,d3,#1
226    VEXT.8   d5,d4,d4,#1
227    B        L0x2c8
228L0x2b4:
229    VLD1.32  {d0[1]},[r1]
230    VDUP.8   d2,d0[7]
231    VEXT.8   d3,d0,d2,#4
232    VEXT.8   d4,d0,d2,#5
233    VEXT.8   d5,d0,d2,#6
234L0x2c8:
235    VRHADD.U8 d7,d4,d3
236    VHADD.U8 d10,d3,d5
237    VRHADD.U8 d10,d10,d4
238    VEXT.8   d8,d7,d7,#1
239    ADD      r11,r3,r5
240    ADD      r12,r5,r5
241    VEXT.8   d9,d10,d8,#1
242    VST1.32  {d7[0]},[r3],r12
243    VST1.32  {d10[0]},[r11],r12
244    VST1.32  {d8[0]},[r3]
245    VST1.32  {d9[0]},[r11]
246    B        L0x348
247OMX_VC_4x4_HU:
248    ADD      r9,r0,r4
249    ADD      r10,r4,r4
250    VLD1.8   {d1[4]},[r0],r10
251    VLD1.8   {d1[5]},[r9],r10
252    VLD1.8   {d1[6]},[r0]
253    VLD1.8   {d1[7]},[r9]
254    VDUP.8   d2,d1[7]
255    VEXT.8   d3,d1,d2,#4
256    VEXT.8   d4,d1,d2,#5
257    VEXT.8   d5,d1,d2,#6
258    VHADD.U8 d7,d3,d5
259    VRHADD.U8 d7,d7,d4
260    VRHADD.U8 d8,d4,d3
261    VZIP.8   d8,d7
262    VST1.32  {d8[0]},[r3],r5
263    VEXT.8   d8,d8,d8,#2
264    VST1.32  {d8[0]},[r3],r5
265    VEXT.8   d8,d8,d8,#2
266    VST1.32  {d8[0]},[r3],r5
267    VST1.32  {d7[0]},[r3]
268L0x348:
269    MOV      r0,#0
270    VPOP     {d8-d12}
271    POP      {r4-r12,pc}
272
273    .end
274