1@/******************************************************************************
2@ *
3@ * Copyright (C) 2015 The Android Open Source Project
4@ *
5@ * Licensed under the Apache License, Version 2.0 (the "License");
6@ * you may not use this file except in compliance with the License.
7@ * You may obtain a copy of the License at:
8@ *
9@ * http://www.apache.org/licenses/LICENSE-2.0
10@ *
11@ * Unless required by applicable law or agreed to in writing, software
12@ * distributed under the License is distributed on an "AS IS" BASIS,
13@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14@ * See the License for the specific language governing permissions and
15@ * limitations under the License.
16@ *
17@ *****************************************************************************
18@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19@*/
20
21@/**
22@******************************************************************************
23@*
24@* @brief :Evaluate best intr chroma mode (among VERT, HORZ and DC )
25@*                and do the prediction.
26@*
27@* @par Description
28@*   This function evaluates  first three intra chroma modes and compute corresponding sad
29@*   and return the buffer predicted with best mode.
30@*
31@* @param[in] pu1_src
32@*  UWORD8 pointer to the source
33@*
34@** @param[in] pu1_ngbr_pels
35@*  UWORD8 pointer to neighbouring pels
36@*
37@* @param[out] pu1_dst
38@*  UWORD8 pointer to the destination
39@*
40@* @param[in] src_strd
41@*  integer source stride
42@*
43@* @param[in] dst_strd
44@*  integer destination stride
45@*
46@* @param[in] u4_n_avblty
47@* availability of neighbouring pixels
48@*
49@* @param[in] u4_intra_mode
50@* Pointer to the variable in which best mode is returned
51@*
52@* @param[in] pu4_sadmin
53@* Pointer to the variable in which minimum sad is returned
54@*
55@* @param[in] u4_valid_intra_modes
56@* Says what all modes are valid
57@*
58@*
59@* @return      none
60@*
61@******************************************************************************
62@*/
63@
64@void ih264e_evaluate_intra_chroma_modes(UWORD8 *pu1_src,
65@                                      UWORD8 *pu1_ngbr_pels_i16,
66@                                      UWORD8 *pu1_dst,
67@                                      UWORD32 src_strd,
68@                                      UWORD32 dst_strd,
69@                                      WORD32 u4_n_avblty,
70@                                      UWORD32 *u4_intra_mode,
71@                                      WORD32 *pu4_sadmin,
72@                                      UWORD32 u4_valid_intra_modes)
73@
74.text
75.p2align 2
76
77    .global ih264e_evaluate_intra_chroma_modes_a9q
78
79ih264e_evaluate_intra_chroma_modes_a9q:
80
81@r0 = pu1_src,
82@r1 = pu1_ngbr_pels_i16,
83@r2 = pu1_dst,
84@r3 = src_strd,
85@r4 = dst_strd,
86@r5 = u4_n_avblty,
87@r6 = u4_intra_mode,
88@r7 = pu4_sadmin
89
90
91
92    stmfd         sp!, {r4-r12, r14}    @store register values to stack
93    @-----------------------
94    ldr           r5, [sp, #44]         @r5 = u4_n_avblty,
95    @-------------------------
96    mov           r12, r1               @
97    vpush         {d8-d15}
98    vld1.32       {q4}, [r1]!
99    add           r1, r1, #2
100    vld1.32       {q5}, [r1]!
101
102    vuzp.u8       q4, q5                @
103
104    vpaddl.u8     d8, d8
105    vpadd.u16     d8, d8
106
107    vpaddl.u8     d9, d9
108    vpadd.u16     d9, d9
109
110    vpaddl.u8     d10, d10
111    vpadd.u16     d10, d10
112
113    vpaddl.u8     d11, d11
114
115    and           r7, r5, #5
116    vpadd.u16     d11, d11
117    subs          r8, r7, #5
118    beq           all_available
119    subs          r8, r7, #4
120    beq           top_available
121    subs          r8, r7, #1
122    beq           left_available
123    mov           r10, #128
124    vdup.8        q14, r10
125    vdup.8        q15, r10
126    b             sad
127
128all_available:
129    vzip.u16      q4, q5
130    vext.16       q6, q4, q4, #2
131    vadd.u16      q7, q5, q6
132    vqrshrn.u16   d14, q7, #3
133    vqrshrn.u16   d15, q4, #2
134    vqrshrn.u16   d16, q5, #2
135    vdup.16       d28, d14[0]
136    vdup.16       d29, d16[1]
137    vdup.16       d30, d15[0]
138    vdup.16       d31, d14[1]
139    b             sad
140top_available:
141    vzip.u16      q4, q5
142    vqrshrn.u16   d16, q5, #2
143    vdup.16       d28, d16[0]
144    vdup.16       d29, d16[1]
145    vdup.16       d30, d16[0]
146    vdup.16       d31, d16[1]
147    b             sad
148left_available:
149    vzip.u16      q4, q5
150    vqrshrn.u16   d16, q4, #2
151    vdup.16       d28, d16[3]
152    vdup.16       d29, d16[3]
153    vdup.16       d30, d16[2]
154    vdup.16       d31, d16[2]
155
156
157sad:
158    vld1.32       {q4}, [r12]!
159    sub           r8, r12, #2
160    add           r12, r12, #2
161    vld1.32       {q5}, [r12]!
162    add           r12, r0, r3, lsl  #2
163    sub           r10, r8, #8
164    vld1.32       {q0}, [r0], r3
165    ldrh          r9, [r8]
166    vdup.16       q10, r9               @ row 0
167
168    @/vertical row 0;
169    vabdl.u8      q8, d0, d10
170    vabdl.u8      q9, d1, d11
171    sub           r8, r8, #2
172    vld1.32       {q1}, [r12], r3
173
174    @/HORZ row 0;
175    vabdl.u8      q13, d0, d20
176    vabdl.u8      q7, d1, d21
177    ldrh          r9, [r10]
178    @/dc row 0;
179    vabdl.u8      q11, d0, d28
180    vabdl.u8      q12, d1, d29
181
182
183    vdup.16       q10, r9               @ row 4
184    @/vertical row 4;
185    vabal.u8      q8, d2, d10
186    vabal.u8      q9, d3, d11
187    sub           r10, r10, #2
188
189    @/HORZ row 4;
190    vabal.u8      q13, d2, d20
191    vabal.u8      q7, d3, d21
192    @/dc row 4;
193    vabal.u8      q11, d2, d30
194    vabal.u8      q12, d3, d31
195
196    mov           r11, #3
197
198loop:
199    vld1.32       {q0}, [r0], r3
200    ldrh          r9, [r8]
201
202
203    @/vertical row i;
204    vabal.u8      q8, d0, d10
205    vabal.u8      q9, d1, d11
206
207    vdup.16       q10, r9               @ row i
208    vld1.32       {q1}, [r12], r3
209    sub           r8, r8, #2
210    @/HORZ row i;
211    vabal.u8      q13, d0, d20
212    vabal.u8      q7, d1, d21
213    ldrh          r9, [r10]
214    @/dc row i;
215    vabal.u8      q11, d0, d28
216    vabal.u8      q12, d1, d29
217    sub           r10, r10, #2
218
219    vdup.16       q10, r9               @ row i+4
220    @/vertical row 4;
221    vabal.u8      q8, d2, d10
222    vabal.u8      q9, d3, d11
223    subs          r11, r11, #1
224
225    @/HORZ row i+4;
226    vabal.u8      q13, d2, d20
227    vabal.u8      q7, d3, d21
228    @/dc row i+4;
229    vabal.u8      q11, d2, d30
230    vabal.u8      q12, d3, d31
231    bne           loop
232
233
234
235@-------------------------------------------
236
237    vadd.i16      q9, q9, q8            @/VERT
238    vadd.i16      q7, q13, q7           @/HORZ
239    vadd.i16      q12, q11, q12         @/DC
240    vadd.i16      d18, d19, d18         @/VERT
241    vadd.i16      d14, d15, d14         @/HORZ
242    vadd.i16      d24, d24, d25         @/DC
243    vpaddl.u16    d18, d18              @/VERT
244    vpaddl.u16    d14, d14              @/HORZ
245    vpaddl.u16    d24, d24              @/DC
246    vpaddl.u32    d18, d18              @/VERT
247    vpaddl.u32    d14, d14              @/HORZ
248    vpaddl.u32    d24, d24              @/DC
249
250
251
252    vmov.u32      r8, d18[0]            @ vert
253    vmov.u32      r9, d14[0]            @horz
254    vmov.u32      r10, d24[0]           @dc
255
256    mov           r11, #1
257@-----------------------
258    ldr           r0, [sp, #120]        @ u4_valid_intra_modes
259@--------------------------------------------
260
261
262    lsl           r11 , #30
263
264    ands          r7, r0, #04           @ vert mode valid????????????
265    moveq         r8, r11
266
267    ands          r6, r0, #02           @ horz mode valid????????????
268    moveq         r9, r11
269
270    ands          r6, r0, #01           @ dc mode valid????????????
271    moveq         r10, r11
272
273
274    @---------------------------
275    ldr           r4, [sp, #104]        @r4 = dst_strd,
276    ldr           r6, [sp, #112]        @ R6 =MODE
277    ldr           r7, [sp, #116]        @r7 = pu4_sadmin
278
279    @--------------------------
280
281    cmp           r10, r9
282    bgt           not_dc
283    cmp           r10, r8
284    bgt           do_vert
285
286    @/----------------------
287    @DO DC PREDICTION
288    str           r10 , [r7]            @MIN SAD
289    mov           r10, #0
290    str           r10 , [r6]            @ MODE
291    b             do_dc_vert
292    @-----------------------------
293
294not_dc:
295    cmp           r9, r8
296    bgt           do_vert
297    @/----------------------
298    @DO HORIZONTAL
299
300    vdup.16       q10, d9[3]            @/HORIZONTAL VALUE ROW=0;
301    str           r9 , [r7]             @MIN SAD
302    mov           r9, #1
303    vdup.16       q11, d9[2]            @/HORIZONTAL VALUE ROW=1;
304    str           r9 , [r6]             @ MODE
305    vdup.16       q12, d9[1]            @/HORIZONTAL VALUE ROW=2;
306    vst1.32       {d20, d21} , [r2], r4 @0
307    vdup.16       q13, d9[0]            @/HORIZONTAL VALUE ROW=3;
308    vst1.32       {d22, d23} , [r2], r4 @1
309    vdup.16       q14, d8[3]            @/HORIZONTAL VALUE ROW=4;
310    vst1.32       {d24, d25} , [r2], r4 @2
311    vdup.16       q15, d8[2]            @/HORIZONTAL VALUE ROW=5;
312    vst1.32       {d26, d27} , [r2], r4 @3
313    vdup.16       q1, d8[1]             @/HORIZONTAL VALUE ROW=6;
314    vst1.32       {d28, d29} , [r2], r4 @4
315    vdup.16       q2, d8[0]             @/HORIZONTAL VALUE ROW=7;
316    vst1.32       {d30, d31} , [r2], r4 @5
317    vst1.32       {d2, d3} , [r2], r4   @6
318    vst1.32       {d4, d5} , [r2], r4   @7
319    b             end_func
320
321do_vert:
322    @DO VERTICAL PREDICTION
323    str           r8 , [r7]             @MIN SAD
324    mov           r8, #2
325    str           r8 , [r6]             @ MODE
326    vmov          q15, q5
327    vmov          q14, q5
328
329do_dc_vert:
330    vst1.32       {d28, d29} , [r2], r4 @0
331    vst1.32       {d28, d29} , [r2], r4 @1
332    vst1.32       {d28, d29} , [r2], r4 @2
333    vst1.32       {d28, d29} , [r2], r4 @3
334    vst1.32       {d30, d31} , [r2], r4 @4
335    vst1.32       {d30, d31} , [r2], r4 @5
336    vst1.32       {d30, d31} , [r2], r4 @6
337    vst1.32       {d30, d31} , [r2], r4 @7
338
339
340end_func:
341    vpop          {d8-d15}
342    ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
343
344
345
346