omxVCM4P2_PredictReconCoefIntra_s.s revision 0c1bc742181ded4930842b46e9507372f0b1b963
1; **********
2; *
3; * File Name:  omxVCM4P2_PredictReconCoefIntra_s.s
4; * OpenMAX DL: v1.0.2
5; * Revision:   12290
6; * Date:       Wednesday, April 9, 2008
7; *
8; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
9; *
10; *
11; *
12; * Description:
13; * Contains module for DC/AC coefficient prediction
14; *
15; *
16; * Function: omxVCM4P2_PredictReconCoefIntra
17; *
18; * Description:
19; * Performs adaptive DC/AC coefficient prediction for an intra block. Prior
20; * to the function call, prediction direction (predDir) should be selected
21; * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2.
22; *
23; * Remarks:
24; *
25; * Parameters:
26; * [in]  pSrcDst      pointer to the coefficient buffer which contains the
27; *                    quantized coefficient residuals (PQF) of the current
28; *                    block; must be aligned on a 4-byte boundary. The
29; *                    output coefficients are saturated to the range
30; *                    [-2048, 2047].
31; * [in]  pPredBufRow  pointer to the coefficient row buffer; must be aligned
32; *                    on a 4-byte boundary.
33; * [in]  pPredBufCol  pointer to the coefficient column buffer; must be
34; *                    aligned on a 4-byte boundary.
35; * [in]  curQP        quantization parameter of the current block. curQP may
36; *                    equal to predQP especially when the current block and
37; *                    the predictor block are in the same macroblock.
38; * [in]  predQP       quantization parameter of the predictor block
39; * [in]  predDir      indicates the prediction direction which takes one
40; *                    of the following values:
41; *                    OMX_VIDEO_HORIZONTAL    predict horizontally
42; *                    OMX_VIDEO_VERTICAL        predict vertically
43; * [in]  ACPredFlag   a flag indicating if AC prediction should be
44; *                    performed. It is equal to ac_pred_flag in the bit
45; *                    stream syntax of MPEG-4
46; * [in]  videoComp    video component type (luminance, chrominance or
47; *                    alpha) of the current block
48; * [out] pSrcDst      pointer to the coefficient buffer which contains
49; *                    the quantized coefficients (QF) of the current
50; *                    block
51; * [out] pPredBufRow  pointer to the updated coefficient row buffer
52; * [out] pPredBufCol  pointer to the updated coefficient column buffer
53; * Return Value:
54; * OMX_Sts_NoErr - no error
55; * OMX_Sts_BadArgErr - Bad arguments
56; * - At least one of the pointers is NULL: pSrcDst, pPredBufRow, or pPredBufCol.
57; * - At least one the following cases: curQP <= 0, predQP <= 0, curQP >31,
58; *   predQP > 31, preDir exceeds [1,2].
59; * - At least one of the pointers pSrcDst, pPredBufRow, or pPredBufCol is not
60; *   4-byte aligned.
61; *
62; *********
63
64        INCLUDE omxtypes_s.h
65        INCLUDE armCOMM_s.h
66
67       M_VARIANTS CortexA8
68
69
70
71       IMPORT        armVCM4P2_Reciprocal_QP_S32
72       IMPORT        armVCM4P2_Reciprocal_QP_S16
73       IMPORT        armVCM4P2_DCScaler
74
75        IF CortexA8
76;// Input Arguments
77
78pSrcDst          RN 0
79pPredBufRow      RN 1
80pPredBufCol      RN 2
81curQP            RN 3
82QP               RN 3
83predQP           RN 4
84predDir          RN 5
85ACPredFlag       RN 6
86videoComp        RN 7
87
88;// Local Variables
89
90shortVideoHeader RN 4
91dcScaler         RN 4
92index            RN 6
93predCoeffTable   RN 7
94temp1            RN 6
95temp2            RN 9
96temp             RN 14
97Const            RN 8
98temppPredColBuf  RN 8
99tempPred         RN 9
100
101absCoeffDC       RN 8
102negdcScaler      RN 10
103Rem              RN 11
104temp3            RN 12
105
106dcRowbufCoeff    RN 10
107dcColBuffCoeff   RN 11
108Return           RN 0
109
110;//NEON Registers
111
112qPredRowBuf       QN Q0.S16
113dPredRowBuf0      DN D0.S16
114dPredRowBuf1      DN D1.S16
115
116
117
118
119qCoeffTab         QN Q1.S32
120
121qPredQP           QN Q2.S16
122dPredQP0          DN D4.S16
123dPredQP1          DN D5.S16
124
125
126qtemp1            QN Q3.S32
127qtemp             QN Q3.S16
128
129dtemp0            DN D6.S16
130dtemp1            DN D7.S16
131
132dtemp2            DN D8.S16
133dtemp3            DN D9.S16
134
135dtemp4            DN D2.S16
136dtemp5            DN D3.S16
137dtemp6            DN D4.S16
138dtemp7            DN D5.S16
139
140qtempPred1        QN Q5.S32
141qtempPred         QN Q5.S16
142
143dtempPred0        DN D10.S16
144dtempPred1        DN D11.S16
145
146
147
148      M_START   omxVCM4P2_PredictReconCoefIntra,r11,d11
149
150      ;// Assigning pointers to Input arguments on Stack
151
152      M_ARG           predQPonStack,4
153      M_ARG           predDironStack,4
154      M_ARG           ACPredFlagonStack,4
155      M_ARG           videoComponStack,4
156
157      ;// DC Prediction
158
159      M_LDR           videoComp,videoComponStack                     ;// Load videoComp From Stack
160
161      M_LDR           predDir,predDironStack                         ;// Load Prediction direction
162      ;// DC Scaler calculation
163      LDR             index, =armVCM4P2_DCScaler
164      ADD             index,index,videoComp,LSL #5
165      LDRB            dcScaler,[index,QP]
166
167
168      LDR             predCoeffTable, =armVCM4P2_Reciprocal_QP_S16   ;// Loading the table with entries 32767/(1 to 63)
169      CMP             predDir,#2                                     ;// Check if the Prediction direction is vertical
170
171      ;// Caulucate tempPred
172
173      LDREQSH         absCoeffDC,[pPredBufRow]                       ;// If vetical load the coeff from Row Prediction Buffer
174      LDRNESH         absCoeffDC,[pPredBufCol]                       ;// If horizontal load the coeff from column Prediction Buffer
175
176      RSB             negdcScaler,dcScaler,#0                        ;// negdcScaler=-dcScaler
177      MOV             temp1,absCoeffDC                               ;// Load the Prediction coeff to temp for comparision
178      CMP             temp1,#0
179      RSBLT           absCoeffDC,temp1,#0                            ;// calculate absolute val of prediction coeff
180
181      ADD             temp,dcScaler,dcScaler
182      LDRH            temp,[predCoeffTable,temp]                     ;// Load value from coeff table for performing division using multiplication
183      SMULBB          tempPred,temp,absCoeffDC                       ;// tempped=pPredBufRow(Col)[0]*32767/dcScaler
184      ADD             temp3,dcScaler,#1
185      LSR             tempPred,tempPred,#15                          ;// tempped=pPredBufRow(Col)[0]/dcScaler
186      LSR             temp3,temp3,#1                                 ;// temp3=round(dcScaler/2)
187      MLA             Rem,negdcScaler,tempPred,absCoeffDC            ;// Remainder Rem=abs(pPredBufRow(Col)[0])-tempPred*dcScaler
188
189      LDRH            dcRowbufCoeff,[pPredBufCol]
190
191      CMP             Rem,temp3                                      ;// compare Rem with (dcScaler/2)
192      ADDGE           tempPred,#1                                    ;// tempPred=tempPred+1 if Rem>=(dcScaler/2)
193      CMP             temp1,#0
194      RSBLT           tempPred,tempPred,#0                           ;// tempPred=-tempPred if
195
196      STRH            dcRowbufCoeff,[pPredBufRow,#-16]
197
198
199      LDRH            temp,[pSrcDst]                                 ;// temp=pSrcDst[0]
200      ADD             temp,temp,tempPred                             ;// temp=pSrcDst[0]+tempPred
201      SSAT16          temp,#12,temp                                  ;// clip temp to [-2048,2047]
202      SMULBB          dcColBuffCoeff,temp,dcScaler                   ;// temp1=clipped(pSrcDst[0])*dcScaler
203      M_LDR           ACPredFlag,ACPredFlagonStack
204      STRH            dcColBuffCoeff,[pPredBufCol]
205
206
207       ;// AC Prediction
208
209      M_LDR           predQP,predQPonStack
210
211      CMP             ACPredFlag,#1                                  ;// Check if the AC prediction flag is set or not
212      BNE             Exit                                           ;// If not set Exit
213      CMP             predDir,#2                                     ;// Check the Prediction direction
214      LDR             predCoeffTable, =armVCM4P2_Reciprocal_QP_S32   ;// Loading the table with entries 0x1ffff/(1 to 63)
215      MOV             Const,#4
216      MUL             curQP,curQP,Const                              ;// curQP=4*curQP
217      VDUP            dPredQP0,predQP
218      LDR             temp2,[predCoeffTable,curQP]                   ;// temp=0x1ffff/curQP
219      VDUP            qCoeffTab,temp2
220      BNE             Horizontal                                     ;// If the Prediction direction is horizontal branch to Horizontal
221
222
223
224      ;// Vertical
225      ;//Calculating tempPred
226
227      VLD1            {dPredRowBuf0,dPredRowBuf1},[pPredBufRow]      ;// Loading pPredBufRow[i]:i=0 t0 7
228
229      VMULL           qtemp1,dPredRowBuf0,dPredQP0                   ;//qtemp1[i]=pPredBufRow[i]*dPredQP[i]: i=0 t0 3
230      VMUL            qtempPred1,qtemp1,qCoeffTab                    ;//qtempPred1[i]=pPredBufRow[i]*dPredQP[i]*0x1ffff/curQP : i=0 t0 3
231
232      VMULL           qtemp1,dPredRowBuf1,dPredQP0                   ;//qtemp1[i]=pPredBufRow[i]*dPredQP[i] : i=4 t0 7
233
234      VRSHR           qtempPred1,qtempPred1,#17                      ;//qtempPred1[i]=round(pPredBufRow[i]*dPredQP[i]/curQP) : i=0 t0 3
235      VSHRN           dPredQP1,qtempPred1,#0                         ;// narrow qtempPred1[i] to 16 bits
236
237
238      VMUL            qtempPred1,qtemp1,qCoeffTab                    ;//qtempPred1[i]=pPredBufRow[i]*dPredQP[i]*0x1ffff/curQP : i=4 t0 7
239      VRSHR           qtempPred1,qtempPred1,#17                      ;//qtempPred1[i]=round(pPredBufRow[i]*dPredQP[i]/curQP)  : i=4 t0 7
240      VLD1            {dtemp0,dtemp1},[pSrcDst]                      ;//Loading pSrcDst[i] : i=0 to 7
241      VSHRN           dtempPred1,qtempPred1,#0                       ;// narrow qtempPred1[i] to 16 bits
242      VMOV            dtempPred0,dPredQP1
243
244      ;//updating source and row prediction buffer contents
245      VADD            qtemp,qtemp,qtempPred                          ;//pSrcDst[i]=pSrcDst[i]+qtempPred[i]: i=0 to 7
246      VQSHL           qtemp,qtemp,#4                                 ;//Clip to [-2048,2047]
247      LDRH            dcRowbufCoeff,[pPredBufRow]                    ;//Loading Dc Value of Row Prediction buffer
248      VSHR            qtemp,qtemp,#4
249
250      VST1            {dtemp0,dtemp1},[pSrcDst]                      ;//storing back the updated values
251      VST1            {dtemp0,dtemp1},[pPredBufRow]                  ;//storing back the updated row prediction values
252      STRH            dcRowbufCoeff,[pPredBufRow]                    ;// storing the updated DC Row Prediction coeff
253
254      B               Exit
255
256Horizontal
257
258      ;// Calculating Temppred
259
260
261
262      VLD1            {dPredRowBuf0,dPredRowBuf1},[pPredBufCol]      ;// Loading pPredBufCol[i]:i=0 t0 7
263      VMULL           qtemp1,dPredRowBuf0,dPredQP0                   ;//qtemp1[i]=pPredBufCol[i]*dPredQP[i]: i=0 t0 3
264      VMUL            qtempPred1,qtemp1,qCoeffTab                    ;//qtempPred1[i]=pPredBufCol[i]*dPredQP[i]*0x1ffff/curQP : i=0 t0 3
265
266      VMULL           qtemp1,dPredRowBuf1,dPredQP0                   ;//qtemp1[i]=pPredBufCol[i]*dPredQP[i] : i=4 t0 7
267
268      VRSHR           qtempPred1,qtempPred1,#17                      ;//qtempPred1[i]=round(pPredBufCol[i]*dPredQP[i]/curQP) : i=0 t0 3
269      VSHRN           dPredQP1,qtempPred1,#0                         ;// narrow qtempPred1[i] to 16 bits
270
271
272      VMUL            qtempPred1,qtemp1,qCoeffTab                    ;//qtempPred1[i]=pPredBufCol[i]*dPredQP[i]*0x1ffff/curQP : i=4 t0 7
273
274      MOV             temppPredColBuf,pPredBufCol
275      VRSHR           qtempPred1,qtempPred1,#17                      ;//qtempPred1[i]=round(pPredBufCol[i]*dPredQP[i]/curQP)  : i=4 t0 7
276      VLD4            {dtemp0,dtemp1,dtemp2,dtemp3},[pSrcDst]        ;// Loading coefficients Interleaving by 4
277      VSHRN           dtempPred1,qtempPred1,#0                       ;// narrow qtempPred1[i] to 16 bits
278      VMOV            dtempPred0,dPredQP1
279
280      ;// Updating source and column prediction buffer contents
281      ADD             temp2,pSrcDst,#32
282      VLD4            {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]          ;// Loading next 16 coefficients Interleaving by 4
283      VUZP            dtemp0,dtemp4                                  ;// Interleaving by 8
284      VADD            dtemp0,dtemp0,dtempPred0                       ;// Adding tempPred to coeffs
285      VQSHL           dtemp0,dtemp0,#4                               ;// Clip to [-2048,2047]
286      VSHR            dtemp0,dtemp0,#4
287      VST1            {dtemp0},[pPredBufCol]!                        ;// Updating Pridiction column buffer
288      VZIP            dtemp0,dtemp4                                  ;// deinterleaving
289      VST4            {dtemp0,dtemp1,dtemp2,dtemp3},[pSrcDst]        ;// Updating source coeffs
290      VST4            {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]!
291
292      MOV             temp1,temp2
293      VLD4            {dtemp0,dtemp1,dtemp2,dtemp3},[temp2]!         ;// Loading  coefficients Interleaving by 4
294
295      VLD4            {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]
296      VUZP            dtemp0,dtemp4                                  ;// Interleaving by 8
297      VADD            dtemp0,dtemp0,dtempPred1
298      VQSHL           dtemp0,dtemp0,#4                               ;// Clip to [-2048,2047]
299      VSHR            dtemp0,dtemp0,#4
300      VST1            {dtemp0},[pPredBufCol]!
301      VZIP            dtemp0,dtemp4
302      VST4            {dtemp0,dtemp1,dtemp2,dtemp3},[temp1]
303      STRH            dcColBuffCoeff,[temppPredColBuf]
304      VST4            {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]
305
306Exit
307
308      STRH            temp,[pSrcDst]
309
310
311      MOV             Return,#OMX_Sts_NoErr
312
313      M_END
314      ENDIF
315
316
317       END
318
319
320
321