fastquant_inline.h revision 59f566c4ec3dfc097ad8163523e522280b27e5c3
1/* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18/*********************************************************************************/
19/*  Filename: fastquant_inline.h                                                        */
20/*  Description: Implementation for in-line functions used in dct.cpp           */
21/*  Modified:                                                                   */
22/*********************************************************************************/
23#ifndef _FASTQUANT_INLINE_H_
24#define _FASTQUANT_INLINE_H_
25
26#include "mp4def.h"
27
28#if !defined(PV_ARM_GCC_V5) && !defined(PV_ARM_GCC_V4) /* ARM GNU COMPILER  */
29
30__inline int32 aan_scale(int32 q_value, int32 coeff, int32 round, int32 QPdiv2)
31{
32    q_value = coeff * q_value + round;
33    coeff = q_value >> 16;
34    if (coeff < 0)  coeff += QPdiv2;
35    else            coeff -= QPdiv2;
36
37    return coeff;
38}
39
40
41__inline int32 coeff_quant(int32 coeff, int32 q_scale, int32 shift)
42{
43    int32 q_value;
44
45    q_value = coeff * q_scale;      //q_value = -((-(coeff + QPdiv2)*q_scale)>>LSL);
46    q_value >>= shift;                  //q_value = (((coeff - QPdiv2)*q_scale)>>LSL );
47    q_value += ((UInt)q_value >> 31); /* add one if negative */
48
49    return q_value;
50}
51
52__inline int32  coeff_clip(int32 q_value, int32 ac_clip)
53{
54    int32 coeff = q_value + ac_clip;
55
56    if ((UInt)coeff > (UInt)(ac_clip << 1))
57        q_value = ac_clip ^(q_value >> 31);
58
59    return q_value;
60}
61
62__inline int32 coeff_dequant(int32 q_value, int32 QPx2, int32 Addition, int32 tmp)
63{
64    int32 coeff;
65
66    OSCL_UNUSED_ARG(tmp);
67
68    if (q_value < 0)
69    {
70        coeff = q_value * QPx2 - Addition;
71        if (coeff < -2048)
72            coeff = -2048;
73    }
74    else
75    {
76        coeff = q_value * QPx2 + Addition;
77        if (coeff > 2047)
78            coeff = 2047;
79    }
80    return coeff;
81}
82
83__inline int32 smlabb(int32 q_value, int32 coeff, int32 round)
84{
85    q_value = coeff * q_value + round;
86
87    return q_value;
88}
89
90__inline int32 smulbb(int32 q_scale, int32 coeff)
91{
92    int32 q_value;
93
94    q_value = coeff * q_scale;
95
96    return q_value;
97}
98
99__inline int32 aan_dc_scale(int32 coeff, int32 QP)
100{
101
102    if (coeff < 0)  coeff += (QP >> 1);
103    else            coeff -= (QP >> 1);
104
105    return coeff;
106}
107
108__inline int32 clip_2047(int32 q_value, int32 tmp)
109{
110    OSCL_UNUSED_ARG(tmp);
111
112    if (q_value < -2048)
113    {
114        q_value = -2048;
115    }
116    else if (q_value > 2047)
117    {
118        q_value = 2047;
119    }
120
121    return q_value;
122}
123
124__inline int32 coeff_dequant_mpeg(int32 q_value, int32 stepsize, int32 QP, int32 tmp)
125{
126    int32 coeff;
127
128    OSCL_UNUSED_ARG(tmp);
129
130    coeff = q_value << 1;
131    stepsize *= QP;
132    if (coeff > 0)
133    {
134        q_value = (coeff + 1) * stepsize;
135        q_value >>= 4;
136        if (q_value > 2047) q_value = 2047;
137    }
138    else
139    {
140        q_value = (coeff - 1) * stepsize;
141        q_value += 15;
142        q_value >>= 4;
143        if (q_value < -2048)    q_value = -2048;
144    }
145
146    return q_value;
147}
148
149__inline int32 coeff_dequant_mpeg_intra(int32 q_value, int32 tmp)
150{
151    OSCL_UNUSED_ARG(tmp);
152
153    q_value <<= 1;
154    if (q_value > 0)
155    {
156        q_value >>= 4;
157        if (q_value > 2047) q_value = 2047;
158    }
159    else
160    {
161        q_value += 15;
162        q_value >>= 4;
163        if (q_value < -2048) q_value = -2048;
164    }
165
166    return q_value;
167}
168
169#elif defined(__CC_ARM)  /* only work with arm v5 */
170
171#if defined(__TARGET_ARCH_5TE)
172
173__inline int32 aan_scale(int32 q_value, int32 coeff,
174                         int32 round, int32 QPdiv2)
175{
176    __asm
177    {
178        smlabb q_value, coeff, q_value, round
179        movs       coeff, q_value, asr #16
180        addle   coeff, coeff, QPdiv2
181        subgt   coeff, coeff, QPdiv2
182    }
183
184    return coeff;
185}
186
187__inline int32 coeff_quant(int32 coeff, int32 q_scale, int32 shift)
188{
189    int32 q_value;
190
191    __asm
192    {
193        smulbb  q_value, q_scale, coeff    /*mov    coeff, coeff, lsl #14*/
194        mov     coeff, q_value, asr shift   /*smull tmp, coeff, q_scale, coeff*/
195        add q_value, coeff, coeff, lsr #31
196    }
197
198
199    return q_value;
200}
201
202__inline int32 coeff_dequant(int32 q_value, int32 QPx2, int32 Addition, int32 tmp)
203{
204    int32 coeff;
205
206    __asm
207    {
208        cmp     q_value, #0
209        smulbb  coeff, q_value, QPx2
210        sublt   coeff, coeff, Addition
211        addge   coeff, coeff, Addition
212        add     q_value, coeff, tmp
213        subs    q_value, q_value, #3840
214        subcss  q_value, q_value, #254
215        eorhi   coeff, tmp, coeff, asr #31
216    }
217
218    return coeff;
219}
220
221__inline int32 smlabb(int32 q_value, int32 coeff, int32 round)
222{
223    __asm
224    {
225        smlabb q_value, coeff, q_value, round
226    }
227
228    return q_value;
229}
230
231__inline int32 smulbb(int32 q_scale, int32 coeff)
232{
233    int32 q_value;
234
235    __asm
236    {
237        smulbb  q_value, q_scale, coeff
238    }
239
240    return q_value;
241}
242
243__inline int32 coeff_dequant_mpeg(int32 q_value, int32 stepsize, int32 QP, int32 tmp)
244{
245    /* tmp must have value of 2047 */
246    int32 coeff;
247    __asm
248    {
249        movs    coeff, q_value, lsl #1
250        smulbb  stepsize, stepsize, QP
251        addgt   coeff, coeff, #1
252        sublt   coeff, coeff, #1
253        smulbb  q_value, coeff, stepsize
254        addlt   q_value, q_value, #15
255        mov     q_value, q_value, asr #4
256        add     coeff, q_value, tmp
257        subs    coeff, coeff, #0xf00
258        subcss  coeff, coeff, #0xfe
259        eorhi   q_value, tmp, q_value, asr #31
260    }
261
262    return q_value;
263}
264
265
266#else // not ARMV5TE
267
268__inline int32 aan_scale(int32 q_value, int32 coeff,
269                         int32 round, int32 QPdiv2)
270{
271    __asm
272    {
273        mla q_value, coeff, q_value, round
274        movs       coeff, q_value, asr #16
275        addle   coeff, coeff, QPdiv2
276        subgt   coeff, coeff, QPdiv2
277    }
278
279    return coeff;
280}
281
282__inline int32 coeff_quant(int32 coeff, int32 q_scale, int32 shift)
283{
284    int32 q_value;
285
286    __asm
287    {
288        mul q_value, q_scale, coeff    /*mov    coeff, coeff, lsl #14*/
289        mov     coeff, q_value, asr shift   /*smull tmp, coeff, q_scale, coeff*/
290        add q_value, coeff, coeff, lsr #31
291    }
292
293
294    return q_value;
295}
296
297
298__inline int32 coeff_dequant(int32 q_value, int32 QPx2, int32 Addition, int32 tmp)
299{
300    int32 coeff;
301
302    __asm
303    {
304        cmp     q_value, #0
305        mul coeff, q_value, QPx2
306        sublt   coeff, coeff, Addition
307        addge   coeff, coeff, Addition
308        add     q_value, coeff, tmp
309        subs    q_value, q_value, #3840
310        subcss  q_value, q_value, #254
311        eorhi   coeff, tmp, coeff, asr #31
312    }
313
314    return coeff;
315}
316
317__inline int32 smlabb(int32 q_value, int32 coeff, int32 round)
318{
319    __asm
320    {
321        mla q_value, coeff, q_value, round
322    }
323
324    return q_value;
325}
326
327__inline int32 smulbb(int32 q_scale, int32 coeff)
328{
329    int32 q_value;
330
331    __asm
332    {
333        mul q_value, q_scale, coeff
334    }
335
336    return q_value;
337}
338
339
340__inline int32 coeff_dequant_mpeg(int32 q_value, int32 stepsize, int32 QP, int32 tmp)
341{
342    /* tmp must have value of 2047 */
343    int32 coeff;
344    __asm
345    {
346        movs    coeff, q_value, lsl #1
347        mul  stepsize, stepsize, QP
348        addgt   coeff, coeff, #1
349        sublt   coeff, coeff, #1
350        mul q_value, coeff, stepsize
351        addlt   q_value, q_value, #15
352        mov     q_value, q_value, asr #4
353        add     coeff, q_value, tmp
354        subs    coeff, coeff, #0xf00
355        subcss  coeff, coeff, #0xfe
356        eorhi   q_value, tmp, q_value, asr #31
357    }
358
359    return q_value;
360}
361
362
363#endif
364
365__inline int32  coeff_clip(int32 q_value, int32 ac_clip)
366{
367    int32 coeff;
368
369    __asm
370    {
371        add     coeff, q_value, ac_clip
372        subs    coeff, coeff, ac_clip, lsl #1
373        eorhi   q_value, ac_clip, q_value, asr #31
374    }
375
376    return q_value;
377}
378
379__inline int32 aan_dc_scale(int32 coeff, int32 QP)
380{
381
382    __asm
383    {
384        cmp   coeff, #0
385        addle   coeff, coeff, QP, asr #1
386        subgt   coeff, coeff, QP, asr #1
387    }
388
389    return coeff;
390}
391
392__inline int32 clip_2047(int32 q_value, int32 tmp)
393{
394    /* tmp must have value of 2047 */
395    int32 coeff;
396
397    __asm
398    {
399        add     coeff, q_value, tmp
400        subs    coeff, coeff, #0xf00
401        subcss  coeff, coeff, #0xfe
402        eorhi   q_value, tmp, q_value, asr #31
403    }
404
405    return q_value;
406}
407
408__inline int32 coeff_dequant_mpeg_intra(int32 q_value, int32 tmp)
409{
410    int32 coeff;
411
412    __asm
413    {
414        movs    q_value, q_value, lsl #1
415        addlt   q_value, q_value, #15
416        mov     q_value, q_value, asr #4
417        add     coeff, q_value, tmp
418        subs    coeff, coeff, #0xf00
419        subcss  coeff, coeff, #0xfe
420        eorhi   q_value, tmp, q_value, asr #31
421    }
422
423    return q_value;
424}
425
426#elif ( defined(PV_ARM_GCC_V4) || defined(PV_ARM_GCC_V5) ) /* ARM GNU COMPILER  */
427
428__inline int32 aan_scale(int32 q_value, int32 coeff,
429                         int32 round, int32 QPdiv2)
430{
431    register int32 out;
432    register int32 qv = q_value;
433    register int32 cf = coeff;
434    register int32 rr = round;
435    register int32 qp = QPdiv2;
436
437    asm volatile("smlabb %0, %2, %1, %3\n\t"
438                 "movs %0, %0, asr #16\n\t"
439                 "addle %0, %0, %4\n\t"
440                 "subgt %0, %0, %4"
441             : "=&r"(out)
442                         : "r"(qv),
443                         "r"(cf),
444                         "r"(rr),
445                         "r"(qp));
446    return out;
447}
448
449__inline int32 coeff_quant(int32 coeff, int32 q_scale, int32 shift)
450{
451    register int32 out;
452    register int32 temp1;
453    register int32 cc = coeff;
454    register int32 qs = q_scale;
455    register int32 ss = shift;
456
457    asm volatile("smulbb %0, %3, %2\n\t"
458                 "mov %1, %0, asr %4\n\t"
459                 "add %0, %1, %1, lsr #31"
460             : "=&r"(out),
461                 "=&r"(temp1)
462                         : "r"(cc),
463                         "r"(qs),
464                         "r"(ss));
465
466    return out;
467}
468
469__inline int32 coeff_clip(int32 q_value, int32 ac_clip)
470{
471    register int32 coeff;
472
473    asm volatile("add   %1, %0, %2\n\t"
474                 "subs  %1, %1, %2, lsl #1\n\t"
475                 "eorhi %0, %2, %0, asr #31"
476             : "+r"(q_value),
477                 "=&r"(coeff)
478                         : "r"(ac_clip));
479
480    return q_value;
481}
482
483__inline int32 coeff_dequant(int32 q_value, int32 QPx2, int32 Addition, int32 tmp)
484{
485    register int32 out;
486    register int32 temp1;
487    register int32 qv = q_value;
488    register int32 qp = QPx2;
489    register int32 aa = Addition;
490    register int32 tt = tmp;
491
492    asm volatile("cmp    %2, #0\n\t"
493                 "mul    %0, %2, %3\n\t"
494                 "sublt  %0, %0, %4\n\t"
495                 "addge  %0, %0, %4\n\t"
496                 "add    %1, %0, %5\n\t"
497                 "subs   %1, %1, #3840\n\t"
498                 "subcss %1, %1, #254\n\t"
499                 "eorhi  %0, %5, %0, asr #31"
500             : "=&r"(out),
501                 "=&r"(temp1)
502                         : "r"(qv),
503                         "r"(qp),
504                         "r"(aa),
505                         "r"(tt));
506
507    return out;
508}
509
510__inline int32 smlabb(int32 q_value, int32 coeff, int32 round)
511{
512    register int32 out;
513    register int32 aa = (int32)q_value;
514    register int32 bb = (int32)coeff;
515    register int32 cc = (int32)round;
516
517    asm volatile("smlabb %0, %1, %2, %3"
518             : "=&r"(out)
519                         : "r"(aa),
520                         "r"(bb),
521                         "r"(cc));
522    return out;
523}
524
525__inline int32 smulbb(int32 q_scale, int32 coeff)
526{
527    register int32 out;
528    register int32 aa = (int32)q_scale;
529    register int32 bb = (int32)coeff;
530
531    asm volatile("smulbb %0, %1, %2"
532             : "=&r"(out)
533                         : "r"(aa),
534                         "r"(bb));
535    return out;
536}
537
538__inline int32 aan_dc_scale(int32 coeff, int32 QP)
539{
540    register int32 out;
541    register int32 cc = coeff;
542    register int32 qp = QP;
543
544    asm volatile("cmp %1, #0\n\t"
545                 "addle %0, %1, %2, asr #1\n\t"
546                 "subgt %0, %1, %2, asr #1"
547             : "=&r"(out)
548                         : "r"(cc),
549                         "r"(qp));
550    return out;
551}
552
553__inline int32 clip_2047(int32 q_value, int32 tmp)
554{
555    register int32 coeff;
556    asm volatile("add    %1, %0, %2\n\t"
557                 "subs   %1, %1, #0xF00\n\t"
558                 "subcss %1, %1, #0xFE\n\t"
559                 "eorhi  %0, %2, %0, asr #31"
560             : "+r"(q_value),
561                 "=&r"(coeff)
562                         : "r"(tmp));
563
564    return q_value;
565}
566
567__inline int32 coeff_dequant_mpeg(int32 q_value, int32 stepsize, int32 QP, int32 tmp)
568{
569    register int32 out;
570    register int32 temp1;
571    register int32 qv = q_value;
572    register int32 ss = stepsize;
573    register int32 qp = QP;
574    register int32 tt = tmp;
575
576    asm volatile("movs    %1, %2, lsl #1\n\t"
577                 "mul     %0, %3, %4\n\t"
578                 "addgt   %1, %1, #1\n\t"
579                 "sublt   %1, %1, #1\n\t"
580                 "mul     %0, %1, %0\n\t"
581                 "addlt   %0, %0, #15\n\t"
582                 "mov     %0, %0, asr #4\n\t"
583                 "add     %1, %0, %5\n\t"
584                 "subs    %1, %1, #0xF00\n\t"
585                 "subcss  %1, %1, #0xFE\n\t"
586                 "eorhi   %0, %5, %0, asr #31"
587             : "=&r"(out),
588                 "=&r"(temp1)
589                         : "r"(qv),
590                         "r"(ss),
591                         "r"(qp),
592                         "r"(tt));
593
594    return out;
595
596}
597
598__inline int32 coeff_dequant_mpeg_intra(int32 q_value, int32 tmp)
599{
600    register int32 out;
601    register int32 temp1;
602    register int32 qv = q_value;
603    register int32 tt = tmp;
604
605    asm volatile("movs    %1, %2, lsl #1\n\t"
606                 "addlt   %1, %1, #15\n\t"
607                 "mov     %0, %1, asr #4\n\t"
608                 "add     %1, %0, %3\n\t"
609                 "subs    %1, %1, #0xF00\n\t"
610                 "subcss  %1, %1, #0xFE\n\t"
611                 "eorhi   %0, %3, %0, asr #31"
612             : "=&r"(out),
613                 "=&r"(temp1)
614                         : "r"(qv),
615                         "r"(tt));
616    return out;
617}
618
619
620#endif // Platform
621
622
623#endif //_FASTQUANT_INLINE_H_
624
625
626