1/*
2 * Copyright (C) 2007-2008 ARM Limited
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17/*
18 *
19 */
20
21    .eabi_attribute 24, 1
22    .eabi_attribute 25, 1
23
24    .arm
25    .fpu neon
26    .text
27    .align 4
28
29armVCM4P10_pIndexTable8x8:
30    .word  OMX_VC_CHROMA_DC-(P0+8),    OMX_VC_CHROMA_HOR-(P0+8)
31    .word  OMX_VC_CHROMA_VERT-(P0+8),  OMX_VC_CHROMA_PLANE-(P0+8)
32
33armVCM4P10_MultiplierTableChroma8x8:
34    .hword   3, 2, 1,4
35    .hword  -3,-2,-1,0
36    .hword   1, 2, 3,4
37
38    .global omxVCM4P10_PredictIntraChroma_8x8
39    .func   omxVCM4P10_PredictIntraChroma_8x8
40omxVCM4P10_PredictIntraChroma_8x8:
41    PUSH     {r4-r10,lr}
42    VPUSH    {d8-d15}
43    ADR      r8, armVCM4P10_pIndexTable8x8
44    LDR      r6,[sp,#0x68]
45    LDR      r4,[sp,#0x60]
46    LDR      r5,[sp,#0x64]
47    LDR      r7,[sp,#0x6c]
48    LDR      r8,[r8,r6,LSL #2]
49P0: ADD      pc,r8
50
51OMX_VC_CHROMA_DC:
52    TST      r7,#2
53    BEQ      L0xe8
54    ADD      r9,r0,r4
55    ADD      r10,r4,r4
56    VLD1.8   {d1[0]},[r0],r10
57    VLD1.8   {d1[1]},[r9],r10
58    VLD1.8   {d1[2]},[r0],r10
59    VLD1.8   {d1[3]},[r9],r10
60    VLD1.8   {d1[4]},[r0],r10
61    VLD1.8   {d1[5]},[r9],r10
62    VLD1.8   {d1[6]},[r0],r10
63    VLD1.8   {d1[7]},[r9]
64    TST      r7,#1
65    BEQ      L0xcc
66    VLD1.8   {d0},[r1]
67    MOV      r0,#0
68    VPADDL.U8 d2,d0
69    VPADDL.U16 d3,d2
70    VPADDL.U8 d2,d1
71    VPADDL.U16 d1,d2
72    VADD.I32 d2,d3,d1
73    VRSHR.U32 d2,d2,#3
74    VRSHR.U32 d3,d3,#2
75    VRSHR.U32 d1,d1,#2
76    VMOV.I8  d5,#0xc
77    VMOV.I8  d6,#0x4
78    VSHL.I64 d5,d5,#32
79    VSHR.U64 d6,d6,#32
80    VADD.I8  d6,d6,d5
81    VTBL.8   d0,{d2-d3},d5
82    VTBL.8   d4,{d1-d2},d6
83L0x9c:
84    ADD      r9,r3,r5
85    ADD      r10,r5,r5
86    VST1.8   {d0},[r3],r10
87    VST1.8   {d0},[r9],r10
88    VST1.8   {d0},[r3],r10
89    VST1.8   {d0},[r9],r10
90    VST1.8   {d4},[r3],r10
91    VST1.8   {d4},[r9],r10
92    VST1.8   {d4},[r3],r10
93    VST1.8   {d4},[r9]
94    VPOP     {d8-d15}
95    POP      {r4-r10,pc}
96L0xcc:
97    MOV      r0,#0
98    VPADDL.U8 d2,d1
99    VPADDL.U16 d1,d2
100    VRSHR.U32 d1,d1,#2
101    VDUP.8   d0,d1[0]
102    VDUP.8   d4,d1[4]
103    B        L0x9c
104L0xe8:
105    TST      r7,#1
106    BEQ      L0x114
107    VLD1.8   {d0},[r1]
108    MOV      r0,#0
109    VPADDL.U8 d2,d0
110    VPADDL.U16 d3,d2
111    VRSHR.U32 d3,d3,#2
112    VMOV.I8  d5,#0x4
113    VSHL.I64 d5,d5,#32
114    VTBL.8   d0,{d3},d5
115    B        L0x11c
116L0x114:
117    VMOV.I8  d0,#0x80
118    MOV      r0,#0
119L0x11c:
120    ADD      r9,r3,r5
121    ADD      r10,r5,r5
122    VST1.8   {d0},[r3],r10
123    VST1.8   {d0},[r9],r10
124    VST1.8   {d0},[r3],r10
125    VST1.8   {d0},[r9],r10
126    VST1.8   {d0},[r3],r10
127    VST1.8   {d0},[r9],r10
128    VST1.8   {d0},[r3],r10
129    VST1.8   {d0},[r9]
130    VPOP     {d8-d15}
131    POP      {r4-r10,pc}
132OMX_VC_CHROMA_VERT:
133    VLD1.8   {d0},[r1]
134    MOV      r0,#0
135    B        L0x11c
136OMX_VC_CHROMA_HOR:
137    ADD      r9,r0,r4
138    ADD      r10,r4,r4
139    VLD1.8   {d0[]},[r0],r10
140    VLD1.8   {d1[]},[r9],r10
141    VLD1.8   {d2[]},[r0],r10
142    VLD1.8   {d3[]},[r9],r10
143    VLD1.8   {d4[]},[r0],r10
144    VLD1.8   {d5[]},[r9],r10
145    VLD1.8   {d6[]},[r0],r10
146    VLD1.8   {d7[]},[r9]
147    B        L0x28c
148OMX_VC_CHROMA_PLANE:
149    ADD      r9,r0,r4
150    ADD      r10,r4,r4
151    VLD1.8   {d0},[r1]
152    VLD1.8   {d2[0]},[r2]
153    VLD1.8   {d1[0]},[r0],r10
154    VLD1.8   {d1[1]},[r9],r10
155    VLD1.8   {d1[2]},[r0],r10
156    VLD1.8   {d1[3]},[r9],r10
157    VLD1.8   {d1[4]},[r0],r10
158    VLD1.8   {d1[5]},[r9],r10
159    VLD1.8   {d1[6]},[r0],r10
160    VLD1.8   {d1[7]},[r9]
161    VREV64.8 d3,d0
162    VSUBL.U8 q3,d3,d2
163    VSHR.U64 d3,d3,#8
164    VSUBL.U8 q2,d3,d0
165    VREV64.8 d3,d1
166    VSUBL.U8 q7,d3,d2
167    VSHR.U64 d3,d3,#8
168    VSUBL.U8 q6,d3,d1
169    ADR      r2, armVCM4P10_MultiplierTableChroma8x8
170    VSHL.I64 d4,d4,#16
171    VEXT.8   d9,d4,d6,#2
172    VLD1.16  {d10},[r2]!
173    VSHL.I64 d12,d12,#16
174    VEXT.8   d16,d12,d14,#2
175    VMUL.I16 d11,d9,d10
176    VMUL.I16 d3,d16,d10
177    VPADD.I16 d3,d11,d3
178    VPADDL.S16 d3,d3
179    VSHL.I32 d2,d3,#4
180    VADD.I32 d3,d3,d2
181    VLD1.16  {d10,d11},[r2]
182    VRSHR.S32 d3,d3,#5
183    VADDL.U8 q0,d0,d1
184    VDUP.16  q0,d1[3]
185    VSHL.I16 q0,q0,#4
186    VDUP.16  q2,d3[0]
187    VDUP.16  q3,d3[2]
188    VMUL.I16 q2,q2,q5
189    VMUL.I16 q3,q3,q5
190    VADD.I16 q2,q2,q0
191    VDUP.16  q0,d6[0]
192    VDUP.16  q1,d6[1]
193    VDUP.16  q4,d6[2]
194    VDUP.16  q5,d6[3]
195    VDUP.16  q6,d7[0]
196    VDUP.16  q7,d7[1]
197    VDUP.16  q8,d7[2]
198    VDUP.16  q9,d7[3]
199    VADD.I16 q0,q2,q0
200    VADD.I16 q1,q2,q1
201    VADD.I16 q4,q2,q4
202    VADD.I16 q5,q2,q5
203    VADD.I16 q6,q2,q6
204    VADD.I16 q7,q2,q7
205    VADD.I16 q8,q2,q8
206    VADD.I16 q9,q2,q9
207    VQRSHRUN.S16 d0,q0,#5
208    VQRSHRUN.S16 d1,q1,#5
209    VQRSHRUN.S16 d2,q4,#5
210    VQRSHRUN.S16 d3,q5,#5
211    VQRSHRUN.S16 d4,q6,#5
212    VQRSHRUN.S16 d5,q7,#5
213    VQRSHRUN.S16 d6,q8,#5
214    VQRSHRUN.S16 d7,q9,#5
215L0x28c:
216    ADD      r9,r3,r5
217    ADD      r10,r5,r5
218    VST1.8   {d0},[r3],r10
219    VST1.8   {d1},[r9],r10
220    VST1.8   {d2},[r3],r10
221    VST1.8   {d3},[r9],r10
222    VST1.8   {d4},[r3],r10
223    VST1.8   {d5},[r9],r10
224    VST1.8   {d6},[r3],r10
225    VST1.8   {d7},[r9]
226    MOV      r0,#0
227    VPOP     {d8-d15}
228    POP      {r4-r10,pc}
229    .endfunc
230
231    .end
232
233