1/*
2 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11
12#include <stdlib.h>
13#include "loopfilter.h"
14#include "onyxc_int.h"
15
16typedef unsigned char uc;
17
18static signed char vp8_signed_char_clamp(int t)
19{
20    t = (t < -128 ? -128 : t);
21    t = (t > 127 ? 127 : t);
22    return (signed char) t;
23}
24
25
26/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
27static signed char vp8_filter_mask(uc limit, uc blimit,
28                            uc p3, uc p2, uc p1, uc p0,
29                            uc q0, uc q1, uc q2, uc q3)
30{
31    signed char mask = 0;
32    mask |= (abs(p3 - p2) > limit);
33    mask |= (abs(p2 - p1) > limit);
34    mask |= (abs(p1 - p0) > limit);
35    mask |= (abs(q1 - q0) > limit);
36    mask |= (abs(q2 - q1) > limit);
37    mask |= (abs(q3 - q2) > limit);
38    mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2  > blimit);
39    return mask - 1;
40}
41
42/* is there high variance internal edge ( 11111111 yes, 00000000 no) */
43static signed char vp8_hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1)
44{
45    signed char hev = 0;
46    hev  |= (abs(p1 - p0) > thresh) * -1;
47    hev  |= (abs(q1 - q0) > thresh) * -1;
48    return hev;
49}
50
51static void vp8_filter(signed char mask, uc hev, uc *op1,
52        uc *op0, uc *oq0, uc *oq1)
53
54{
55    signed char ps0, qs0;
56    signed char ps1, qs1;
57    signed char filter_value, Filter1, Filter2;
58    signed char u;
59
60    ps1 = (signed char) * op1 ^ 0x80;
61    ps0 = (signed char) * op0 ^ 0x80;
62    qs0 = (signed char) * oq0 ^ 0x80;
63    qs1 = (signed char) * oq1 ^ 0x80;
64
65    /* add outer taps if we have high edge variance */
66    filter_value = vp8_signed_char_clamp(ps1 - qs1);
67    filter_value &= hev;
68
69    /* inner taps */
70    filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0));
71    filter_value &= mask;
72
73    /* save bottom 3 bits so that we round one side +4 and the other +3
74     * if it equals 4 we'll set to adjust by -1 to account for the fact
75     * we'd round 3 the other way
76     */
77    Filter1 = vp8_signed_char_clamp(filter_value + 4);
78    Filter2 = vp8_signed_char_clamp(filter_value + 3);
79    Filter1 >>= 3;
80    Filter2 >>= 3;
81    u = vp8_signed_char_clamp(qs0 - Filter1);
82    *oq0 = u ^ 0x80;
83    u = vp8_signed_char_clamp(ps0 + Filter2);
84    *op0 = u ^ 0x80;
85    filter_value = Filter1;
86
87    /* outer tap adjustments */
88    filter_value += 1;
89    filter_value >>= 1;
90    filter_value &= ~hev;
91
92    u = vp8_signed_char_clamp(qs1 - filter_value);
93    *oq1 = u ^ 0x80;
94    u = vp8_signed_char_clamp(ps1 + filter_value);
95    *op1 = u ^ 0x80;
96
97}
98void vp8_loop_filter_horizontal_edge_c
99(
100    unsigned char *s,
101    int p, /* pitch */
102    const unsigned char *blimit,
103    const unsigned char *limit,
104    const unsigned char *thresh,
105    int count
106)
107{
108    int  hev = 0; /* high edge variance */
109    signed char mask = 0;
110    int i = 0;
111
112    /* loop filter designed to work using chars so that we can make maximum use
113     * of 8 bit simd instructions.
114     */
115    do
116    {
117        mask = vp8_filter_mask(limit[0], blimit[0],
118                               s[-4*p], s[-3*p], s[-2*p], s[-1*p],
119                               s[0*p], s[1*p], s[2*p], s[3*p]);
120
121        hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
122
123        vp8_filter(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p);
124
125        ++s;
126    }
127    while (++i < count * 8);
128}
129
130void vp8_loop_filter_vertical_edge_c
131(
132    unsigned char *s,
133    int p,
134    const unsigned char *blimit,
135    const unsigned char *limit,
136    const unsigned char *thresh,
137    int count
138)
139{
140    int  hev = 0; /* high edge variance */
141    signed char mask = 0;
142    int i = 0;
143
144    /* loop filter designed to work using chars so that we can make maximum use
145     * of 8 bit simd instructions.
146     */
147    do
148    {
149        mask = vp8_filter_mask(limit[0], blimit[0],
150                               s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
151
152        hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
153
154        vp8_filter(mask, hev, s - 2, s - 1, s, s + 1);
155
156        s += p;
157    }
158    while (++i < count * 8);
159}
160
161static void vp8_mbfilter(signed char mask, uc hev,
162                           uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2)
163{
164    signed char s, u;
165    signed char filter_value, Filter1, Filter2;
166    signed char ps2 = (signed char) * op2 ^ 0x80;
167    signed char ps1 = (signed char) * op1 ^ 0x80;
168    signed char ps0 = (signed char) * op0 ^ 0x80;
169    signed char qs0 = (signed char) * oq0 ^ 0x80;
170    signed char qs1 = (signed char) * oq1 ^ 0x80;
171    signed char qs2 = (signed char) * oq2 ^ 0x80;
172
173    /* add outer taps if we have high edge variance */
174    filter_value = vp8_signed_char_clamp(ps1 - qs1);
175    filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0));
176    filter_value &= mask;
177
178    Filter2 = filter_value;
179    Filter2 &= hev;
180
181    /* save bottom 3 bits so that we round one side +4 and the other +3 */
182    Filter1 = vp8_signed_char_clamp(Filter2 + 4);
183    Filter2 = vp8_signed_char_clamp(Filter2 + 3);
184    Filter1 >>= 3;
185    Filter2 >>= 3;
186    qs0 = vp8_signed_char_clamp(qs0 - Filter1);
187    ps0 = vp8_signed_char_clamp(ps0 + Filter2);
188
189
190    /* only apply wider filter if not high edge variance */
191    filter_value &= ~hev;
192    Filter2 = filter_value;
193
194    /* roughly 3/7th difference across boundary */
195    u = vp8_signed_char_clamp((63 + Filter2 * 27) >> 7);
196    s = vp8_signed_char_clamp(qs0 - u);
197    *oq0 = s ^ 0x80;
198    s = vp8_signed_char_clamp(ps0 + u);
199    *op0 = s ^ 0x80;
200
201    /* roughly 2/7th difference across boundary */
202    u = vp8_signed_char_clamp((63 + Filter2 * 18) >> 7);
203    s = vp8_signed_char_clamp(qs1 - u);
204    *oq1 = s ^ 0x80;
205    s = vp8_signed_char_clamp(ps1 + u);
206    *op1 = s ^ 0x80;
207
208    /* roughly 1/7th difference across boundary */
209    u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7);
210    s = vp8_signed_char_clamp(qs2 - u);
211    *oq2 = s ^ 0x80;
212    s = vp8_signed_char_clamp(ps2 + u);
213    *op2 = s ^ 0x80;
214}
215
216void vp8_mbloop_filter_horizontal_edge_c
217(
218    unsigned char *s,
219    int p,
220    const unsigned char *blimit,
221    const unsigned char *limit,
222    const unsigned char *thresh,
223    int count
224)
225{
226    signed char hev = 0; /* high edge variance */
227    signed char mask = 0;
228    int i = 0;
229
230    /* loop filter designed to work using chars so that we can make maximum use
231     * of 8 bit simd instructions.
232     */
233    do
234    {
235
236        mask = vp8_filter_mask(limit[0], blimit[0],
237                               s[-4*p], s[-3*p], s[-2*p], s[-1*p],
238                               s[0*p], s[1*p], s[2*p], s[3*p]);
239
240        hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
241
242        vp8_mbfilter(mask, hev, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p, s + 2 * p);
243
244        ++s;
245    }
246    while (++i < count * 8);
247
248}
249
250
251void vp8_mbloop_filter_vertical_edge_c
252(
253    unsigned char *s,
254    int p,
255    const unsigned char *blimit,
256    const unsigned char *limit,
257    const unsigned char *thresh,
258    int count
259)
260{
261    signed char hev = 0; /* high edge variance */
262    signed char mask = 0;
263    int i = 0;
264
265    do
266    {
267
268        mask = vp8_filter_mask(limit[0], blimit[0],
269                               s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
270
271        hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
272
273        vp8_mbfilter(mask, hev, s - 3, s - 2, s - 1, s, s + 1, s + 2);
274
275        s += p;
276    }
277    while (++i < count * 8);
278
279}
280
281/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
282static signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q0, uc q1)
283{
284/* Why does this cause problems for win32?
285 * error C2143: syntax error : missing ';' before 'type'
286 *  (void) limit;
287 */
288    signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2  <= blimit) * -1;
289    return mask;
290}
291
292static void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1)
293{
294    signed char filter_value, Filter1, Filter2;
295    signed char p1 = (signed char) * op1 ^ 0x80;
296    signed char p0 = (signed char) * op0 ^ 0x80;
297    signed char q0 = (signed char) * oq0 ^ 0x80;
298    signed char q1 = (signed char) * oq1 ^ 0x80;
299    signed char u;
300
301    filter_value = vp8_signed_char_clamp(p1 - q1);
302    filter_value = vp8_signed_char_clamp(filter_value + 3 * (q0 - p0));
303    filter_value &= mask;
304
305    /* save bottom 3 bits so that we round one side +4 and the other +3 */
306    Filter1 = vp8_signed_char_clamp(filter_value + 4);
307    Filter1 >>= 3;
308    u = vp8_signed_char_clamp(q0 - Filter1);
309    *oq0  = u ^ 0x80;
310
311    Filter2 = vp8_signed_char_clamp(filter_value + 3);
312    Filter2 >>= 3;
313    u = vp8_signed_char_clamp(p0 + Filter2);
314    *op0 = u ^ 0x80;
315}
316
317void vp8_loop_filter_simple_horizontal_edge_c
318(
319    unsigned char *s,
320    int p,
321    const unsigned char *blimit
322)
323{
324    signed char mask = 0;
325    int i = 0;
326
327    do
328    {
329        mask = vp8_simple_filter_mask(blimit[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
330        vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p);
331        ++s;
332    }
333    while (++i < 16);
334}
335
336void vp8_loop_filter_simple_vertical_edge_c
337(
338    unsigned char *s,
339    int p,
340    const unsigned char *blimit
341)
342{
343    signed char mask = 0;
344    int i = 0;
345
346    do
347    {
348        mask = vp8_simple_filter_mask(blimit[0], s[-2], s[-1], s[0], s[1]);
349        vp8_simple_filter(mask, s - 2, s - 1, s, s + 1);
350        s += p;
351    }
352    while (++i < 16);
353
354}
355
356/* Horizontal MB filtering */
357void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr,
358                           unsigned char *v_ptr, int y_stride, int uv_stride,
359                           loop_filter_info *lfi)
360{
361    vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
362
363    if (u_ptr)
364        vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
365
366    if (v_ptr)
367        vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
368}
369
370/* Vertical MB Filtering */
371void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr,
372                           unsigned char *v_ptr, int y_stride, int uv_stride,
373                           loop_filter_info *lfi)
374{
375    vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
376
377    if (u_ptr)
378        vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
379
380    if (v_ptr)
381        vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
382}
383
384/* Horizontal B Filtering */
385void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr,
386                          unsigned char *v_ptr, int y_stride, int uv_stride,
387                          loop_filter_info *lfi)
388{
389    vp8_loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
390    vp8_loop_filter_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
391    vp8_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
392
393    if (u_ptr)
394        vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
395
396    if (v_ptr)
397        vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
398}
399
400void vp8_loop_filter_bhs_c(unsigned char *y_ptr, int y_stride,
401                           const unsigned char *blimit)
402{
403    vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, blimit);
404    vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, blimit);
405    vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, blimit);
406}
407
408/* Vertical B Filtering */
409void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr,
410                          unsigned char *v_ptr, int y_stride, int uv_stride,
411                          loop_filter_info *lfi)
412{
413    vp8_loop_filter_vertical_edge_c(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
414    vp8_loop_filter_vertical_edge_c(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
415    vp8_loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
416
417    if (u_ptr)
418        vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
419
420    if (v_ptr)
421        vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
422}
423
424void vp8_loop_filter_bvs_c(unsigned char *y_ptr, int y_stride,
425                           const unsigned char *blimit)
426{
427    vp8_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, blimit);
428    vp8_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, blimit);
429    vp8_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, blimit);
430}
431