1;//
2;// Copyright (C) 2007-2008 ARM Limited
3;//
4;// Licensed under the Apache License, Version 2.0 (the "License");
5;// you may not use this file except in compliance with the License.
6;// You may obtain a copy of the License at
7;//
8;//      http://www.apache.org/licenses/LICENSE-2.0
9;//
10;// Unless required by applicable law or agreed to in writing, software
11;// distributed under the License is distributed on an "AS IS" BASIS,
12;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13;// See the License for the specific language governing permissions and
14;// limitations under the License.
15;//
16;/**
17; *
18; * File Name:  omxVCM4P2_QuantInvIntra_I_s.s
19; * OpenMAX DL: v1.0.2
20; * Revision:   12290
21; * Date:       Wednesday, April 9, 2008
22; *
23; *
24; *
25; *
26; * Description:
27; * Contains modules for inter reconstruction
28; *
29; *
30; *
31; *
32; *
33; *
34; * Function: omxVCM4P2_QuantInvIntra_I
35; *
36; * Description:
37; * Performs inverse quantization on intra/inter coded block.
38; * This function supports bits_per_pixel = 8. Mismatch control
39; * is performed for the first MPEG-4 mode inverse quantization method.
40; * The output coefficients are clipped to the range: [-2048, 2047].
41; * Mismatch control is performed for the first inverse quantization method.
42; *
43; * Remarks:
44; *
45; * Parameters:
46; * [in]    pSrcDst        pointer to the input (quantized) intra/inter block. Must be 16-byte aligned.
47; * [in]    QP            quantization parameter (quantiser_scale)
48; * [in]    videoComp          (Intra version only.) Video component type of the
49; *                    current block. Takes one of the following flags:
50; *                    OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE,
51; *                    OMX_VC_ALPHA.
52; * [in]    shortVideoHeader  a flag indicating presence of short_video_header;
53; *                           shortVideoHeader==1 selects linear intra DC mode,
54; *                    and shortVideoHeader==0 selects nonlinear intra DC mode.
55; * [out]    pSrcDst        pointer to the output (dequantized) intra/inter block.  Must be 16-byte aligned.
56; *
57; * Return Value:
58; * OMX_Sts_NoErr - no error
59; * OMX_Sts_BadArgErr - bad arguments
60; *    -    If pSrcDst is NULL or is not 16-byte aligned.
61; *      or
62; *    - If QP <= 0.
63; *      or
64; *    - videoComp is none of OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE and OMX_VC_ALPHA.
65; *
66
67
68   INCLUDE omxtypes_s.h
69   INCLUDE armCOMM_s.h
70
71   M_VARIANTS CortexA8
72
73
74   IMPORT        armVCM4P2_DCScaler
75
76     IF CortexA8
77
78
79;//Input Arguments
80pSrcDst            RN 0
81QP                 RN 1
82videoComp          RN 2
83shortVideoHeader   RN 3
84
85
86;//Local Variables
87
88dcScaler           RN 4
89temp               RN 14
90index              RN 5
91
92
93Count              RN 5
94doubleQP           RN 4
95Return             RN 0
96
97
98;// Neon registers
99
100
101dQP10              DN D0.S32[0]
102qQP1               QN Q0.S32
103
104dQP1               DN D0.S16
105dMinusQP1          DN D1.S16
106
107dCoeff0            DN D2.S16
108dCoeff1            DN D3.S16
109
110qResult0           QN Q3.S32
111dResult0           DN D7.S16
112qSign0             QN Q3.S32
113dSign0             DN D6.S16
114
115qResult1           QN Q4.S32
116dResult1           DN D8.S16
117qSign1             QN Q4.S32
118dSign1             DN D8.S16
119
120d2QP0              DN D10.S32[0]
121q2QP0              QN Q5.S32
122d2QP               DN D10.S16
123
124dZero0             DN D11.S16
125dZero1             DN D4.S16
126dConst0            DN D5.S16
127
128
129
130
131
132
133     M_START omxVCM4P2_QuantInvIntra_I,r5,d11
134
135
136        ;// Perform Inverse Quantization for DC coefficient
137
138        TEQ       shortVideoHeader,#0      ;// Test if short Video Header flag =0
139        MOVNE     dcScaler,#8              ;// if shortVideoHeader is non zero dcScaler=8
140        BNE       calDCVal
141
142        LDR       index, =armVCM4P2_DCScaler
143      ADD       index,index,videoComp,LSL #5
144      LDRB      dcScaler,[index,QP]
145
146        ;//M_CalDCScalar  shortVideoHeader,videoComp, QP
147
148calDCVal
149
150        LDRH     temp,[pSrcDst]
151        SMULBB   temp,temp,dcScaler       ;// dcCoeff = dcScaler * Quantized DC coefficient(from memory)
152        SSAT     temp,#12,temp            ;// Saturating to 12 bits
153
154
155
156        ;// Perform Inverse Quantization for Ac Coefficients
157
158
159
160         ADD      doubleQP,QP,QP                   ;// doubleQP= 2*QP
161         VMOV     d2QP0,doubleQP
162         VDUP     q2QP0,d2QP0                      ;// Move doubleQP in to a scalar
163         TST      QP,#1
164         VLD1     {dCoeff0,dCoeff1},[pSrcDst]      ;// Load first 8 values to Coeff0,Coeff1
165         SUBEQ    QP,QP,#1
166         VMOV     dQP10,QP                         ;// If QP is even then QP1=QP-1 else QP1=QP
167         MOV      Count,#64
168         VDUP     qQP1,dQP10                       ;// Duplicate tempResult with QP1
169         VSHRN    d2QP,q2QP0,#0
170         VEOR     dConst0,dConst0,dConst0
171         VSHRN    dQP1,qQP1,#0                     ;// QP1 truncated to 16 bits
172         VSUB     dMinusQP1,dConst0,dQP1           ;// dMinusQP1=-QP1
173
174Loop
175
176        ;//Performing Inverse Quantization
177
178         VCLT     dSign0,dCoeff0, #0               ;// Compare Coefficient 0 against 0
179         VCLT     dSign1,dCoeff1, #0               ;// Compare Coefficient 1 against 0
180         VCEQ     dZero0,dCoeff0,#0                ;// Compare Coefficient 0 against zero
181         VBSL     dSign0,dMinusQP1,dQP1            ;// dSign0 = -QP1 if Coeff0< 0 else QP1
182         VCEQ     dZero1,dCoeff1,#0                ;// Compare Coefficient 1 against zero
183         VBSL     dSign1,dMinusQP1,dQP1            ;// dSign1 = -QP1 if Coeff1< 0 else QP1
184         VMOVL    qSign0,dSign0                    ;// Sign extend qSign0 to 32 bits
185         VMOVL    qSign1,dSign1
186         VMLAL    qResult0,dCoeff0,d2QP            ;// qResult0[i]= qCoeff0[i]+qCoeff0[i]*(-2) if Coeff <0
187                                                   ;// qResult0[i]= qCoeff0[i]                 if Coeff >=0
188         VMLAL    qResult1,dCoeff1,d2QP            ;// qResult1[i]= qCoeff1[i]+qCoeff1[i]*(-2) if Coeff <0
189                                                   ;// qResult1[i]= qCoeff1[i]                 if Coeff >=0
190         ;// Clip Result to [-2048,2047]
191
192         VQSHL    qResult0,qResult0,#20            ;// clip to [-2048,2047]
193         VQSHL    qResult1,qResult1,#20
194
195         VSHR     qResult0,qResult0,#4
196         VSHR     qResult1,qResult1,#4
197         VSHRN    dResult0,qResult0,#16            ;// Narrow the clipped Value to Halfword
198         VSHRN    dResult1,qResult1,#16
199         VBIT     dResult0,dConst0,dZero0
200         VBIT     dResult1,dConst0,dZero1
201
202         VST1     {dResult0,dResult1},[pSrcDst]!   ;// Store the result
203         SUBS     Count,Count,#8
204         VLD1     {dCoeff0,dCoeff1},[pSrcDst]
205
206
207         BGT      Loop
208
209         SUB      pSrcDst,pSrcDst,#128
210
211         ;// Store the Inverse quantized Dc coefficient
212
213         STRH     temp,[pSrcDst],#2
214
215         MOV      Return,#OMX_Sts_NoErr
216
217
218
219         M_END
220         ENDIF
221
222
223        END
224
225