1@/******************************************************************************
2@ *
3@ * Copyright (C) 2015 The Android Open Source Project
4@ *
5@ * Licensed under the Apache License, Version 2.0 (the "License");
6@ * you may not use this file except in compliance with the License.
7@ * You may obtain a copy of the License at:
8@ *
9@ * http://www.apache.org/licenses/LICENSE-2.0
10@ *
11@ * Unless required by applicable law or agreed to in writing, software
12@ * distributed under the License is distributed on an "AS IS" BASIS,
13@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14@ * See the License for the specific language governing permissions and
15@ * limitations under the License.
16@ *
17@ *****************************************************************************
18@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19@*/
20
21@******************************************************************************
22@*
23@* @brief
24@*  This file contains definitions of routines for spatial filter
25@*
26@* @author
27@*  Ittiam
28@*
29@* @par List of Functions:
30@*  - ideint_spatial_filter_a9()
31@*
32@* @remarks
33@*  None
34@*
35@*******************************************************************************
36
37
38@******************************************************************************
39@*
40@*  @brief Performs spatial filtering
41@*
42@*  @par   Description
43@*   This functions performs edge adaptive spatial filtering on a 8x8 block
44@*
45@* @param[in] pu1_src
46@*  UWORD8 pointer to the source
47@*
48@* @param[in] pu1_out
49@*  UWORD8 pointer to the destination
50@*
51@* @param[in] src_strd
52@*  source stride
53@*
54@* @param[in] src_strd
55@*  destination stride
56@*
57@* @returns
58@*  None
59@*
60@* @remarks
61@*
62@******************************************************************************
63
64    .global ideint_spatial_filter_a9
65
66ideint_spatial_filter_a9:
67
68    stmfd       sp!,    {r4-r10, lr}
69
70    vmov.u16    q8,     #0
71    vmov.u16    q9,     #0
72    vmov.u16    q10,    #0
73
74    @ Backup r0
75    mov         r10,    r0
76
77    @ Load from &pu1_row_1[0]
78    sub         r5,     r0,     #1
79    vld1.8      d0,     [r0],   r2
80
81    @ Load from &pu1_row_1[-1]
82    vld1.8      d1,     [r5]
83    add         r5,     r5,     #2
84
85    @ Load from &pu1_row_1[1]
86    vld1.8      d2,     [r5]
87
88    @ Number of rows
89    mov         r4,     #4
90
91    @ EDGE_BIAS_0
92    vmov.u32    d30,    #5
93
94    @ EDGE_BIAS_1
95    vmov.u32    d31,    #7
96
97detect_edge:
98    @ Load from &pu1_row_2[0]
99    sub         r5,     r0,     #1
100    vld1.8      d3,     [r0],   r2
101
102    @ Load from &pu1_row_2[-1]
103    vld1.8      d4,     [r5]
104    add         r5,     r5,     #2
105
106    @ Load from &pu1_row_2[1]
107    vld1.8      d5,     [r5]
108
109    @ Calculate absolute differences
110    @ pu1_row_1[i] - pu1_row_2[i]
111    vabal.u8    q8,     d0,     d3
112
113    @ pu1_row_1[i - 1] - pu1_row_2[i + 1]
114    vabal.u8    q9,     d1,     d5
115
116    @ pu1_row_1[i + 1] - pu1_row_2[i - 1]
117    vabal.u8    q10,    d4,     d2
118
119    vmov        d0,     d3
120    vmov        d1,     d4
121    vmov        d2,     d5
122
123    subs        r4,     r4,     #1
124    bgt         detect_edge
125
126    @ Calculate sum of absolute differeces for each edge
127    vpadd.u16   d16,    d16,    d17
128    vpadd.u16   d18,    d18,    d19
129    vpadd.u16   d20,    d20,    d21
130
131    vpaddl.u16  d16,    d16
132    vpaddl.u16  d18,    d18
133    vpaddl.u16  d20,    d20
134
135    @ adiff[0] *= EDGE_BIAS_0;
136    vmul.u32    d16,    d16,    d30
137
138    @ adiff[1] *= EDGE_BIAS_1;
139    vmul.u32    d18,    d18,    d31
140
141    @ adiff[2] *= EDGE_BIAS_1;
142    vmul.u32    d20,    d20,    d31
143
144    @ Move the differences to ARM registers
145
146
147    @ Compute shift for first half of the block
148compute_shift_1:
149    vmov.u32    r5,     d16[0]
150    vmov.u32    r6,     d18[0]
151    vmov.u32    r7,     d20[0]
152
153    @ Compute shift
154    mov         r8,     #0
155
156    @ adiff[2] <= adiff[1]
157    cmp         r7,     r6
158    bgt         dir_45_gt_135_1
159
160    @ adiff[2] <= adiff[0]
161    cmp         r7,     r5
162    movle       r8,     #1
163
164    b           compute_shift_2
165dir_45_gt_135_1:
166
167    @ adiff[1] <= adiff[0]
168    cmp         r6,     r5
169    @ Move -1 if less than or equal to
170    mvnle       r8,     #0
171
172
173compute_shift_2:
174    @ Compute shift for first half of the block
175    vmov.u32    r5,     d16[1]
176    vmov.u32    r6,     d18[1]
177    vmov.u32    r7,     d20[1]
178
179    @ Compute shift
180    mov         r9,     #0
181
182    @ adiff[2] <= adiff[1]
183    cmp         r7,     r6
184    bgt         dir_45_gt_135_2
185
186    @ adiff[2] <= adiff[0]
187    cmp         r7,     r5
188    movle       r9,     #1
189
190    b           interpolate
191dir_45_gt_135_2:
192
193    @ adiff[1] <= adiff[0]
194    cmp         r6,     r5
195
196    @ Move -1 if less than or equal to
197    mvnle       r9,     #0
198
199interpolate:
200    add         r4,     r10,    r8
201    add         r5,     r10,    r2
202    sub         r5,     r5,     r8
203
204    add         r10,    r10,    #4
205    add         r6,     r10,    r9
206    add         r7,     r10,    r2
207    sub         r7,     r7,     r9
208    mov         r8,     #4
209
210filter_loop:
211    vld1.u32    d0[0],  [r4],   r2
212    vld1.u32    d2[0],  [r5],   r2
213
214    vld1.u32    d0[1],  [r6],   r2
215    vld1.u32    d2[1],  [r7],   r2
216
217    vrhadd.u8   d4,     d0,     d2
218    vst1.u32    d4,     [r1],   r3
219
220    subs        r8,     #1
221    bgt         filter_loop
222
223    ldmfd       sp!,    {r4-r10, pc}
224