sad_halfpel.cpp revision 29a84457aed4c45bc900998b5e11c03023264208
1/* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18/* contains
19int AVCHalfPel1_SAD_MB(uint8 *ref,uint8 *blk,int dmin,int width,int ih,int jh)
20int AVCHalfPel2_SAD_MB(uint8 *ref,uint8 *blk,int dmin,int width)
21int AVCHalfPel1_SAD_Blk(uint8 *ref,uint8 *blk,int dmin,int width,int ih,int jh)
22int AVCHalfPel2_SAD_Blk(uint8 *ref,uint8 *blk,int dmin,int width)
23
24int AVCSAD_MB_HalfPel_C(uint8 *ref,uint8 *blk,int dmin,int width,int rx,int xh,int yh,void *extra_info)
25int AVCSAD_MB_HP_HTFM_Collect(uint8 *ref,uint8 *blk,int dmin,int width,int rx,int xh,int yh,void *extra_info)
26int AVCSAD_MB_HP_HTFM(uint8 *ref,uint8 *blk,int dmin,int width,int rx,int xh,int yh,void *extra_info)
27int AVCSAD_Blk_HalfPel_C(uint8 *ref,uint8 *blk,int dmin,int width,int rx,int xh,int yh,void *extra_info)
28*/
29
30#include "avcenc_lib.h"
31#include "sad_halfpel_inline.h"
32
33#ifdef _SAD_STAT
34uint32 num_sad_HP_MB = 0;
35uint32 num_sad_HP_Blk = 0;
36uint32 num_sad_HP_MB_call = 0;
37uint32 num_sad_HP_Blk_call = 0;
38#define NUM_SAD_HP_MB_CALL()    num_sad_HP_MB_call++
39#define NUM_SAD_HP_MB()         num_sad_HP_MB++
40#define NUM_SAD_HP_BLK_CALL()   num_sad_HP_Blk_call++
41#define NUM_SAD_HP_BLK()        num_sad_HP_Blk++
42#else
43#define NUM_SAD_HP_MB_CALL()
44#define NUM_SAD_HP_MB()
45#define NUM_SAD_HP_BLK_CALL()
46#define NUM_SAD_HP_BLK()
47#endif
48
49
50
51/*===============================================================
52    Function:   SAD_MB_HalfPel
53    Date:       09/17/2000
54    Purpose:    Compute the SAD on the half-pel resolution
55    Input/Output:   hmem is assumed to be a pointer to the starting
56                point of the search in the 33x33 matrix search region
57    Changes:
58    11/7/00:    implemented MMX
59  ===============================================================*/
60/*==================================================================
61    Function:   AVCSAD_MB_HalfPel_C
62    Date:       04/30/2001
63    Purpose:    Compute SAD 16x16 between blk and ref in halfpel
64                resolution,
65    Changes:
66  ==================================================================*/
67/* One component is half-pel */
68int AVCSAD_MB_HalfPel_Cxhyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
69{
70    (void)(extra_info);
71
72    int i, j;
73    int sad = 0;
74    uint8 *kk, *p1, *p2, *p3, *p4;
75//  int sumref=0;
76    int temp;
77    int rx = dmin_rx & 0xFFFF;
78
79    NUM_SAD_HP_MB_CALL();
80
81    p1 = ref;
82    p2 = ref + 1;
83    p3 = ref + rx;
84    p4 = ref + rx + 1;
85    kk  = blk;
86
87    for (i = 0; i < 16; i++)
88    {
89        for (j = 0; j < 16; j++)
90        {
91
92            temp = ((p1[j] + p2[j] + p3[j] + p4[j] + 2) >> 2) - *kk++;
93            sad += AVC_ABS(temp);
94        }
95
96        NUM_SAD_HP_MB();
97
98        if (sad > (int)((uint32)dmin_rx >> 16))
99            return sad;
100
101        p1 += rx;
102        p3 += rx;
103        p2 += rx;
104        p4 += rx;
105    }
106    return sad;
107}
108
109int AVCSAD_MB_HalfPel_Cyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
110{
111    (void)(extra_info);
112
113    int i, j;
114    int sad = 0;
115    uint8 *kk, *p1, *p2;
116//  int sumref=0;
117    int temp;
118    int rx = dmin_rx & 0xFFFF;
119
120    NUM_SAD_HP_MB_CALL();
121
122    p1 = ref;
123    p2 = ref + rx; /* either left/right or top/bottom pixel */
124    kk  = blk;
125
126    for (i = 0; i < 16; i++)
127    {
128        for (j = 0; j < 16; j++)
129        {
130
131            temp = ((p1[j] + p2[j] + 1) >> 1) - *kk++;
132            sad += AVC_ABS(temp);
133        }
134
135        NUM_SAD_HP_MB();
136
137        if (sad > (int)((uint32)dmin_rx >> 16))
138            return sad;
139        p1 += rx;
140        p2 += rx;
141    }
142    return sad;
143}
144
145int AVCSAD_MB_HalfPel_Cxh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
146{
147    (void)(extra_info);
148
149    int i, j;
150    int sad = 0;
151    uint8 *kk, *p1;
152    int temp;
153    int rx = dmin_rx & 0xFFFF;
154
155    NUM_SAD_HP_MB_CALL();
156
157    p1 = ref;
158    kk  = blk;
159
160    for (i = 0; i < 16; i++)
161    {
162        for (j = 0; j < 16; j++)
163        {
164
165            temp = ((p1[j] + p1[j+1] + 1) >> 1) - *kk++;
166            sad += AVC_ABS(temp);
167        }
168
169        NUM_SAD_HP_MB();
170
171        if (sad > (int)((uint32)dmin_rx >> 16))
172            return sad;
173        p1 += rx;
174    }
175    return sad;
176}
177
178#ifdef HTFM  /* HTFM with uniform subsampling implementation,  2/28/01 */
179
180//Checheck here
181int AVCAVCSAD_MB_HP_HTFM_Collectxhyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
182{
183    int i, j;
184    int sad = 0;
185    uint8 *p1, *p2;
186    int rx = dmin_rx & 0xFFFF;
187    int refwx4 = rx << 2;
188    int saddata[16];      /* used when collecting flag (global) is on */
189    int difmad, tmp, tmp2;
190    int madstar;
191    HTFM_Stat *htfm_stat = (HTFM_Stat*) extra_info;
192    int *abs_dif_mad_avg = &(htfm_stat->abs_dif_mad_avg);
193    UInt *countbreak = &(htfm_stat->countbreak);
194    int *offsetRef = htfm_stat->offsetRef;
195    uint32 cur_word;
196
197    madstar = (uint32)dmin_rx >> 20;
198
199    NUM_SAD_HP_MB_CALL();
200
201    blk -= 4;
202
203    for (i = 0; i < 16; i++) /* 16 stages */
204    {
205        p1 = ref + offsetRef[i];
206        p2 = p1 + rx;
207
208        j = 4;/* 4 lines */
209        do
210        {
211            cur_word = *((uint32*)(blk += 4));
212            tmp = p1[12] + p2[12];
213            tmp2 = p1[13] + p2[13];
214            tmp += tmp2;
215            tmp2 = (cur_word >> 24) & 0xFF;
216            tmp += 2;
217            sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
218            tmp = p1[8] + p2[8];
219            tmp2 = p1[9] + p2[9];
220            tmp += tmp2;
221            tmp2 = (cur_word >> 16) & 0xFF;
222            tmp += 2;
223            sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
224            tmp = p1[4] + p2[4];
225            tmp2 = p1[5] + p2[5];
226            tmp += tmp2;
227            tmp2 = (cur_word >> 8) & 0xFF;
228            tmp += 2;
229            sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
230            tmp2 = p1[1] + p2[1];
231            tmp = p1[0] + p2[0];
232            p1 += refwx4;
233            p2 += refwx4;
234            tmp += tmp2;
235            tmp2 = (cur_word & 0xFF);
236            tmp += 2;
237            sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
238        }
239        while (--j);
240
241        NUM_SAD_HP_MB();
242
243        saddata[i] = sad;
244
245        if (i > 0)
246        {
247            if (sad > ((uint32)dmin_rx >> 16))
248            {
249                difmad = saddata[0] - ((saddata[1] + 1) >> 1);
250                (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
251                (*countbreak)++;
252                return sad;
253            }
254        }
255    }
256    difmad = saddata[0] - ((saddata[1] + 1) >> 1);
257    (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
258    (*countbreak)++;
259
260    return sad;
261}
262
263int AVCAVCSAD_MB_HP_HTFM_Collectyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
264{
265    int i, j;
266    int sad = 0;
267    uint8 *p1, *p2;
268    int rx = dmin_rx & 0xFFFF;
269    int refwx4 = rx << 2;
270    int saddata[16];      /* used when collecting flag (global) is on */
271    int difmad, tmp, tmp2;
272    int madstar;
273    HTFM_Stat *htfm_stat = (HTFM_Stat*) extra_info;
274    int *abs_dif_mad_avg = &(htfm_stat->abs_dif_mad_avg);
275    UInt *countbreak = &(htfm_stat->countbreak);
276    int *offsetRef = htfm_stat->offsetRef;
277    uint32 cur_word;
278
279    madstar = (uint32)dmin_rx >> 20;
280
281    NUM_SAD_HP_MB_CALL();
282
283    blk -= 4;
284
285    for (i = 0; i < 16; i++) /* 16 stages */
286    {
287        p1 = ref + offsetRef[i];
288        p2 = p1 + rx;
289        j = 4;
290        do
291        {
292            cur_word = *((uint32*)(blk += 4));
293            tmp = p1[12];
294            tmp2 = p2[12];
295            tmp++;
296            tmp2 += tmp;
297            tmp = (cur_word >> 24) & 0xFF;
298            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
299            tmp = p1[8];
300            tmp2 = p2[8];
301            tmp++;
302            tmp2 += tmp;
303            tmp = (cur_word >> 16) & 0xFF;
304            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
305            tmp = p1[4];
306            tmp2 = p2[4];
307            tmp++;
308            tmp2 += tmp;
309            tmp = (cur_word >> 8) & 0xFF;
310            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
311            tmp = p1[0];
312            p1 += refwx4;
313            tmp2 = p2[0];
314            p2 += refwx4;
315            tmp++;
316            tmp2 += tmp;
317            tmp = (cur_word & 0xFF);
318            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
319        }
320        while (--j);
321
322        NUM_SAD_HP_MB();
323
324        saddata[i] = sad;
325
326        if (i > 0)
327        {
328            if (sad > ((uint32)dmin_rx >> 16))
329            {
330                difmad = saddata[0] - ((saddata[1] + 1) >> 1);
331                (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
332                (*countbreak)++;
333                return sad;
334            }
335        }
336    }
337    difmad = saddata[0] - ((saddata[1] + 1) >> 1);
338    (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
339    (*countbreak)++;
340
341    return sad;
342}
343
344int AVCAVCSAD_MB_HP_HTFM_Collectxh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
345{
346    int i, j;
347    int sad = 0;
348    uint8 *p1;
349    int rx = dmin_rx & 0xFFFF;
350    int refwx4 = rx << 2;
351    int saddata[16];      /* used when collecting flag (global) is on */
352    int difmad, tmp, tmp2;
353    int madstar;
354    HTFM_Stat *htfm_stat = (HTFM_Stat*) extra_info;
355    int *abs_dif_mad_avg = &(htfm_stat->abs_dif_mad_avg);
356    UInt *countbreak = &(htfm_stat->countbreak);
357    int *offsetRef = htfm_stat->offsetRef;
358    uint32 cur_word;
359
360    madstar = (uint32)dmin_rx >> 20;
361
362    NUM_SAD_HP_MB_CALL();
363
364    blk -= 4;
365
366    for (i = 0; i < 16; i++) /* 16 stages */
367    {
368        p1 = ref + offsetRef[i];
369
370        j = 4; /* 4 lines */
371        do
372        {
373            cur_word = *((uint32*)(blk += 4));
374            tmp = p1[12];
375            tmp2 = p1[13];
376            tmp++;
377            tmp2 += tmp;
378            tmp = (cur_word >> 24) & 0xFF;
379            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
380            tmp = p1[8];
381            tmp2 = p1[9];
382            tmp++;
383            tmp2 += tmp;
384            tmp = (cur_word >> 16) & 0xFF;
385            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
386            tmp = p1[4];
387            tmp2 = p1[5];
388            tmp++;
389            tmp2 += tmp;
390            tmp = (cur_word >> 8) & 0xFF;
391            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
392            tmp = p1[0];
393            tmp2 = p1[1];
394            p1 += refwx4;
395            tmp++;
396            tmp2 += tmp;
397            tmp = (cur_word & 0xFF);
398            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
399        }
400        while (--j);
401
402        NUM_SAD_HP_MB();
403
404        saddata[i] = sad;
405
406        if (i > 0)
407        {
408            if (sad > ((uint32)dmin_rx >> 16))
409            {
410                difmad = saddata[0] - ((saddata[1] + 1) >> 1);
411                (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
412                (*countbreak)++;
413                return sad;
414            }
415        }
416    }
417    difmad = saddata[0] - ((saddata[1] + 1) >> 1);
418    (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
419    (*countbreak)++;
420
421    return sad;
422}
423
424int AVCSAD_MB_HP_HTFMxhyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
425{
426    int i, j;
427    int sad = 0, tmp, tmp2;
428    uint8 *p1, *p2;
429    int rx = dmin_rx & 0xFFFF;
430    int refwx4 = rx << 2;
431    int sadstar = 0, madstar;
432    int *nrmlz_th = (int*) extra_info;
433    int *offsetRef = nrmlz_th + 32;
434    uint32 cur_word;
435
436    madstar = (uint32)dmin_rx >> 20;
437
438    NUM_SAD_HP_MB_CALL();
439
440    blk -= 4;
441
442    for (i = 0; i < 16; i++) /* 16 stages */
443    {
444        p1 = ref + offsetRef[i];
445        p2 = p1 + rx;
446
447        j = 4; /* 4 lines */
448        do
449        {
450            cur_word = *((uint32*)(blk += 4));
451            tmp = p1[12] + p2[12];
452            tmp2 = p1[13] + p2[13];
453            tmp += tmp2;
454            tmp2 = (cur_word >> 24) & 0xFF;
455            tmp += 2;
456            sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
457            tmp = p1[8] + p2[8];
458            tmp2 = p1[9] + p2[9];
459            tmp += tmp2;
460            tmp2 = (cur_word >> 16) & 0xFF;
461            tmp += 2;
462            sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
463            tmp = p1[4] + p2[4];
464            tmp2 = p1[5] + p2[5];
465            tmp += tmp2;
466            tmp2 = (cur_word >> 8) & 0xFF;
467            tmp += 2;
468            sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
469            tmp2 = p1[1] + p2[1];
470            tmp = p1[0] + p2[0];
471            p1 += refwx4;
472            p2 += refwx4;
473            tmp += tmp2;
474            tmp2 = (cur_word & 0xFF);
475            tmp += 2;
476            sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
477        }
478        while (--j);
479
480        NUM_SAD_HP_MB();
481
482        sadstar += madstar;
483        if (sad > sadstar - nrmlz_th[i] || sad > ((uint32)dmin_rx >> 16))
484        {
485            return 65536;
486        }
487    }
488
489    return sad;
490}
491
492int AVCSAD_MB_HP_HTFMyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
493{
494    int i, j;
495    int sad = 0, tmp, tmp2;
496    uint8 *p1, *p2;
497    int rx = dmin_rx & 0xFFFF;
498    int refwx4 = rx << 2;
499    int sadstar = 0, madstar;
500    int *nrmlz_th = (int*) extra_info;
501    int *offsetRef = nrmlz_th + 32;
502    uint32 cur_word;
503
504    madstar = (uint32)dmin_rx >> 20;
505
506    NUM_SAD_HP_MB_CALL();
507
508    blk -= 4;
509
510    for (i = 0; i < 16; i++) /* 16 stages */
511    {
512        p1 = ref + offsetRef[i];
513        p2 = p1 + rx;
514        j = 4;
515        do
516        {
517            cur_word = *((uint32*)(blk += 4));
518            tmp = p1[12];
519            tmp2 = p2[12];
520            tmp++;
521            tmp2 += tmp;
522            tmp = (cur_word >> 24) & 0xFF;
523            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
524            tmp = p1[8];
525            tmp2 = p2[8];
526            tmp++;
527            tmp2 += tmp;
528            tmp = (cur_word >> 16) & 0xFF;
529            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
530            tmp = p1[4];
531            tmp2 = p2[4];
532            tmp++;
533            tmp2 += tmp;
534            tmp = (cur_word >> 8) & 0xFF;
535            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
536            tmp = p1[0];
537            p1 += refwx4;
538            tmp2 = p2[0];
539            p2 += refwx4;
540            tmp++;
541            tmp2 += tmp;
542            tmp = (cur_word & 0xFF);
543            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
544        }
545        while (--j);
546
547        NUM_SAD_HP_MB();
548        sadstar += madstar;
549        if (sad > sadstar - nrmlz_th[i] || sad > ((uint32)dmin_rx >> 16))
550        {
551            return 65536;
552        }
553    }
554
555    return sad;
556}
557
558int AVCSAD_MB_HP_HTFMxh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
559{
560    int i, j;
561    int sad = 0, tmp, tmp2;
562    uint8 *p1;
563    int rx = dmin_rx & 0xFFFF;
564    int refwx4 = rx << 2;
565    int sadstar = 0, madstar;
566    int *nrmlz_th = (int*) extra_info;
567    int *offsetRef = nrmlz_th + 32;
568    uint32 cur_word;
569
570    madstar = (uint32)dmin_rx >> 20;
571
572    NUM_SAD_HP_MB_CALL();
573
574    blk -= 4;
575
576    for (i = 0; i < 16; i++) /* 16 stages */
577    {
578        p1 = ref + offsetRef[i];
579
580        j = 4;/* 4 lines */
581        do
582        {
583            cur_word = *((uint32*)(blk += 4));
584            tmp = p1[12];
585            tmp2 = p1[13];
586            tmp++;
587            tmp2 += tmp;
588            tmp = (cur_word >> 24) & 0xFF;
589            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
590            tmp = p1[8];
591            tmp2 = p1[9];
592            tmp++;
593            tmp2 += tmp;
594            tmp = (cur_word >> 16) & 0xFF;
595            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
596            tmp = p1[4];
597            tmp2 = p1[5];
598            tmp++;
599            tmp2 += tmp;
600            tmp = (cur_word >> 8) & 0xFF;
601            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
602            tmp = p1[0];
603            tmp2 = p1[1];
604            p1 += refwx4;
605            tmp++;
606            tmp2 += tmp;
607            tmp = (cur_word & 0xFF);
608            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
609        }
610        while (--j);
611
612        NUM_SAD_HP_MB();
613
614        sadstar += madstar;
615        if (sad > sadstar - nrmlz_th[i] || sad > ((uint32)dmin_rx >> 16))
616        {
617            return 65536;
618        }
619    }
620
621    return sad;
622}
623
624#endif /* HTFM */
625
626
627
628
629
630