1@/******************************************************************************
2@ *
3@ * Copyright (C) 2015 The Android Open Source Project
4@ *
5@ * Licensed under the Apache License, Version 2.0 (the "License");
6@ * you may not use this file except in compliance with the License.
7@ * You may obtain a copy of the License at:
8@ *
9@ * http://www.apache.org/licenses/LICENSE-2.0
10@ *
11@ * Unless required by applicable law or agreed to in writing, software
12@ * distributed under the License is distributed on an "AS IS" BASIS,
13@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14@ * See the License for the specific language governing permissions and
15@ * limitations under the License.
16@ *
17@ *****************************************************************************
18@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19@*/
20
21@******************************************************************************
22@*
23@* @brief
24@*  This file contains definitions of routines for spatial filter
25@*
26@* @author
27@*  Ittiam
28@*
29@* @par List of Functions:
30@*  - ideint_cac_8x8_a9()
31@*
32@* @remarks
33@*  None
34@*
35@*******************************************************************************
36
37
38@******************************************************************************
39@*
40@*  @brief Calculates Combing Artifact
41@*
42@*  @par   Description
43@*   This functions calculates combing artifact check (CAC) for given two fields
44@*
45@* @param[in] pu1_top
46@*  UWORD8 pointer to top field
47@*
48@* @param[in] pu1_bot
49@*  UWORD8 pointer to bottom field
50@*
51@* @param[in] top_strd
52@*  Top field stride
53@*
54@* @param[in] bot_strd
55@*  Bottom field stride
56@*
57@* @returns
58@*  None
59@*
60@* @remarks
61@*
62@******************************************************************************
63
64    .global ideint_cac_8x8_a9
65
66ideint_cac_8x8_a9:
67
68    stmfd       sp!,    {r4-r10, lr}
69
70    @ Load first row of top
71    vld1.u8     d28,    [r0],   r2
72
73    @ Load first row of bottom
74    vld1.u8     d29,    [r1],   r3
75
76    @ Load second row of top
77    vld1.u8     d30,    [r0],   r2
78
79    @ Load second row of bottom
80    vld1.u8     d31,    [r1],   r3
81
82
83    @ Calculate row based adj and alt values
84    @ Get row sums
85    vpaddl.u8   q0,     q14
86
87    vpaddl.u8   q1,     q15
88
89    vpaddl.u16  q0,     q0
90
91    vpaddl.u16  q1,     q1
92
93    @ Both q0 and q1 have four 32 bit sums corresponding to first 4 rows
94    @ Pack q0 and q1 into a single register (sum does not exceed 16bits)
95
96    vshl.u32    q8,     q1,     #16
97    vorr.u32    q8,     q0,     q8
98    @ q8 now contains 8 sums
99
100    @ Load third row of top
101    vld1.u8     d24,    [r0],   r2
102
103    @ Load third row of bottom
104    vld1.u8     d25,    [r1],   r3
105
106    @ Load fourth row of top
107    vld1.u8     d26,    [r0],   r2
108
109    @ Load fourth row of bottom
110    vld1.u8     d27,    [r1],   r3
111
112    @ Get row sums
113    vpaddl.u8   q2,     q12
114
115    vpaddl.u8   q3,     q13
116
117    vpaddl.u16  q2,     q2
118
119    vpaddl.u16  q3,     q3
120    @ Both q2 and q3 have four 32 bit sums corresponding to last 4 rows
121    @ Pack q2 and q3 into a single register (sum does not exceed 16bits)
122
123    vshl.u32    q9,     q3,     #16
124    vorr.u32    q9,     q2,     q9
125    @ q9 now contains 8 sums
126
127    @ Compute absolute diff between top and bottom row sums
128    vabd.u16    d16,    d16,    d17
129    vabd.u16    d17,    d18,    d19
130
131    @ RSUM_CSUM_THRESH
132    vmov.u16    q9,     #20
133
134    @ Eliminate values smaller than RSUM_CSUM_THRESH
135    vcge.u16    q10,    q8,     q9
136    vand.u16    q10,    q8,     q10
137    @ q10 now contains 8 absolute diff of sums above the threshold
138
139
140    @ Compute adj
141    vadd.u16    d20,    d20,    d21
142
143    @ d20 has four adj values for two sub-blocks
144
145    @ Compute alt
146    vabd.u32    q0,     q0,     q1
147    vabd.u32    q2,     q2,     q3
148
149    vadd.u32    q0,     q0,     q2
150    vadd.u32    d21,    d0,     d1
151    @ d21 has two values for two sub-blocks
152
153
154    @ Calculate column based adj and alt values
155
156    vrhadd.u8   q0,     q14,    q15
157    vrhadd.u8   q1,     q12,    q13
158    vrhadd.u8   q0,     q0,     q1
159
160    vabd.u8     d0,     d0,     d1
161
162    @ RSUM_CSUM_THRESH >> 2
163    vmov.u8     d9,     #5
164
165    @ Eliminate values smaller than RSUM_CSUM_THRESH >> 2
166    vcge.u8     d1,     d0,     d9
167    vand.u8     d0,     d0,     d1
168    @ d0 now contains 8 absolute diff of sums above the threshold
169
170
171    vpaddl.u8   d0,     d0
172    vshl.u16    d0,     d0,     #2
173
174    @ Add row based adj
175    vadd.u16    d20,    d0,     d20
176
177    vpaddl.u16  d20,    d20
178    @ d20 now contains 2 adj values
179
180
181    vrhadd.u8   d0,     d28,    d29
182    vrhadd.u8   d2,     d24,    d25
183    vrhadd.u8   d0,     d0,     d2
184
185    vrhadd.u8   d1,     d30,    d31
186    vrhadd.u8   d3,     d26,    d27
187    vrhadd.u8   d1,     d1,     d3
188
189    vabd.u8     d0,     d0,     d1
190    vpaddl.u8   d0,     d0
191
192    vshl.u16    d0,     d0,     #2
193    vpaddl.u16  d0,     d0
194    vadd.u32    d21,    d0,     d21
195
196
197    @ d21 now contains 2 alt values
198
199    @ SAD_BIAS_MULT_SHIFT
200    vshr.u32    d0,     d21,    #3
201    vadd.u32    d21,    d21,    d0
202
203    @ SAD_BIAS_ADDITIVE >> 1
204    vmov.u32    d0,     #4
205    vadd.u32    d21,    d21,    d0
206
207    vclt.u32    d0,     d21,    d20
208    vpaddl.u32  d0,     d0
209
210    vmov.u32    r0,     d0[0]
211    cmp         r0,     #0
212    movne       r0,     #1
213    ldmfd       sp!,    {r4-r10, pc}
214