omxVCM4P2_QuantInvIntra_I_s.s revision 78e52bfac041d71ce53b5b13c2abf78af742b09d
11320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci;//
21320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci;// Copyright (C) 2007-2008 ARM Limited
31320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci;//
41320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci;// Licensed under the Apache License, Version 2.0 (the "License");
51320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci;// you may not use this file except in compliance with the License.
61320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci;// You may obtain a copy of the License at
71320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci;//
81320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci;//      http://www.apache.org/licenses/LICENSE-2.0
91320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci;//
101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci;// Unless required by applicable law or agreed to in writing, software
111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci;// distributed under the License is distributed on an "AS IS" BASIS,
121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci;// See the License for the specific language governing permissions and
141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci;// limitations under the License.
151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci;//
161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci;/**
171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci; *
181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci; * File Name:  omxVCM4P2_QuantInvIntra_I_s.s
191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci; * OpenMAX DL: v1.0.2
201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci; * Revision:   12290
211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci; * Date:       Wednesday, April 9, 2008
221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci; *
231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci; *
241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci; *
251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci; *
261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci; * Description:
271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci; * Contains modules for inter reconstruction
281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci; *
291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci; *
301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci; *
311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci; *
321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci; *
331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci; *
341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci; * Function: omxVCM4P2_QuantInvIntra_I
351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci; *
361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci; * Description:
37; * Performs inverse quantization on intra/inter coded block.
38; * This function supports bits_per_pixel = 8. Mismatch control
39; * is performed for the first MPEG-4 mode inverse quantization method.
40; * The output coefficients are clipped to the range: [-2048, 2047].
41; * Mismatch control is performed for the first inverse quantization method.
42; *
43; * Remarks:
44; *
45; * Parameters:
46; * [in]    pSrcDst        pointer to the input (quantized) intra/inter block. Must be 16-byte aligned.
47; * [in]    QP            quantization parameter (quantiser_scale)
48; * [in]    videoComp          (Intra version only.) Video component type of the
49; *                    current block. Takes one of the following flags:
50; *                    OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE,
51; *                    OMX_VC_ALPHA.
52; * [in]    shortVideoHeader  a flag indicating presence of short_video_header;
53; *                           shortVideoHeader==1 selects linear intra DC mode,
54; *                    and shortVideoHeader==0 selects nonlinear intra DC mode.
55; * [out]    pSrcDst        pointer to the output (dequantized) intra/inter block.  Must be 16-byte aligned.
56; *
57; * Return Value:
58; * OMX_Sts_NoErr - no error
59; * OMX_Sts_BadArgErr - bad arguments
60; *    -    If pSrcDst is NULL or is not 16-byte aligned.
61; *      or
62; *    - If QP <= 0.
63; *      or
64; *    - videoComp is none of OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE and OMX_VC_ALPHA.
65; *
66
67
68   INCLUDE omxtypes_s.h
69   INCLUDE armCOMM_s.h
70
71   M_VARIANTS CortexA8
72
73
74   IMPORT        armVCM4P2_DCScaler
75
76     IF CortexA8
77
78
79;//Input Arguments
80pSrcDst            RN 0
81QP                 RN 1
82videoComp          RN 2
83shortVideoHeader   RN 3
84
85
86;//Local Variables
87
88dcScaler           RN 4
89temp               RN 14
90index              RN 5
91
92
93Count              RN 5
94doubleQP           RN 4
95Return             RN 0
96
97
98;// Neon registers
99
100
101dQP10              DN D0.S32[0]
102qQP1               QN Q0.S32
103
104dQP1               DN D0.S16
105dMinusQP1          DN D1.S16
106
107dCoeff0            DN D2.S16
108dCoeff1            DN D3.S16
109
110qResult0           QN Q3.S32
111dResult0           DN D7.S16
112qSign0             QN Q3.S32
113dSign0             DN D6.S16
114
115qResult1           QN Q4.S32
116dResult1           DN D8.S16
117qSign1             QN Q4.S32
118dSign1             DN D8.S16
119
120d2QP0              DN D10.S32[0]
121q2QP0              QN Q5.S32
122d2QP               DN D10.S16
123
124dZero0             DN D11.S16
125dZero1             DN D4.S16
126dConst0            DN D5.S16
127
128
129
130
131
132
133     M_START omxVCM4P2_QuantInvIntra_I,r5,d11
134
135
136        ;// Perform Inverse Quantization for DC coefficient
137
138        TEQ       shortVideoHeader,#0      ;// Test if short Video Header flag =0
139        MOVNE     dcScaler,#8              ;// if shortVideoHeader is non zero dcScaler=8
140        BNE       calDCVal
141
142        LDR       index, =armVCM4P2_DCScaler
143      ADD       index,index,videoComp,LSL #5
144      LDRB      dcScaler,[index,QP]
145
146        ;//M_CalDCScalar  shortVideoHeader,videoComp, QP
147
148calDCVal
149
150        LDRH     temp,[pSrcDst]
151        SMULBB   temp,temp,dcScaler       ;// dcCoeff = dcScaler * Quantized DC coefficient(from memory)
152        SSAT     temp,#12,temp            ;// Saturating to 12 bits
153
154
155
156        ;// Perform Inverse Quantization for Ac Coefficients
157
158
159
160         ADD      doubleQP,QP,QP                   ;// doubleQP= 2*QP
161         VMOV     d2QP0,doubleQP
162         VDUP     q2QP0,d2QP0                      ;// Move doubleQP in to a scalar
163         TST      QP,#1
164         VLD1     {dCoeff0,dCoeff1},[pSrcDst]      ;// Load first 8 values to Coeff0,Coeff1
165         SUBEQ    QP,QP,#1
166         VMOV     dQP10,QP                         ;// If QP is even then QP1=QP-1 else QP1=QP
167         MOV      Count,#64
168         VDUP     qQP1,dQP10                       ;// Duplicate tempResult with QP1
169         VSHRN    d2QP,q2QP0,#0
170         VEOR     dConst0,dConst0,dConst0
171         VSHRN    dQP1,qQP1,#0                     ;// QP1 truncated to 16 bits
172         VSUB     dMinusQP1,dConst0,dQP1           ;// dMinusQP1=-QP1
173
174Loop
175
176        ;//Performing Inverse Quantization
177
178         VCLT     dSign0,dCoeff0, #0               ;// Compare Coefficient 0 against 0
179         VCLT     dSign1,dCoeff1, #0               ;// Compare Coefficient 1 against 0
180         VCEQ     dZero0,dCoeff0,#0                ;// Compare Coefficient 0 against zero
181         VBSL     dSign0,dMinusQP1,dQP1            ;// dSign0 = -QP1 if Coeff0< 0 else QP1
182         VCEQ     dZero1,dCoeff1,#0                ;// Compare Coefficient 1 against zero
183         VBSL     dSign1,dMinusQP1,dQP1            ;// dSign1 = -QP1 if Coeff1< 0 else QP1
184         VMOVL    qSign0,dSign0                    ;// Sign extend qSign0 to 32 bits
185         VMOVL    qSign1,dSign1
186         VMLAL    qResult0,dCoeff0,d2QP            ;// qResult0[i]= qCoeff0[i]+qCoeff0[i]*(-2) if Coeff <0
187                                                   ;// qResult0[i]= qCoeff0[i]                 if Coeff >=0
188         VMLAL    qResult1,dCoeff1,d2QP            ;// qResult1[i]= qCoeff1[i]+qCoeff1[i]*(-2) if Coeff <0
189                                                   ;// qResult1[i]= qCoeff1[i]                 if Coeff >=0
190         ;// Clip Result to [-2048,2047]
191
192         VQSHL    qResult0,qResult0,#20            ;// clip to [-2048,2047]
193         VQSHL    qResult1,qResult1,#20
194
195         VSHR     qResult0,qResult0,#4
196         VSHR     qResult1,qResult1,#4
197         VSHRN    dResult0,qResult0,#16            ;// Narrow the clipped Value to Halfword
198         VSHRN    dResult1,qResult1,#16
199         VBIT     dResult0,dConst0,dZero0
200         VBIT     dResult1,dConst0,dZero1
201
202         VST1     {dResult0,dResult1},[pSrcDst]!   ;// Store the result
203         SUBS     Count,Count,#8
204         VLD1     {dCoeff0,dCoeff1},[pSrcDst]
205
206
207         BGT      Loop
208
209         SUB      pSrcDst,pSrcDst,#128
210
211         ;// Store the Inverse quantized Dc coefficient
212
213         STRH     temp,[pSrcDst],#2
214
215         MOV      Return,#OMX_Sts_NoErr
216
217
218
219         M_END
220         ENDIF
221
222
223        END
224
225