1;// 2;// Copyright (C) 2007-2008 ARM Limited 3;// 4;// Licensed under the Apache License, Version 2.0 (the "License"); 5;// you may not use this file except in compliance with the License. 6;// You may obtain a copy of the License at 7;// 8;// http://www.apache.org/licenses/LICENSE-2.0 9;// 10;// Unless required by applicable law or agreed to in writing, software 11;// distributed under the License is distributed on an "AS IS" BASIS, 12;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13;// See the License for the specific language governing permissions and 14;// limitations under the License. 15;// 16; ********** 17; * 18; * File Name: omxVCM4P2_PredictReconCoefIntra_s.s 19; * OpenMAX DL: v1.0.2 20; * Revision: 12290 21; * Date: Wednesday, April 9, 2008 22; * 23; * 24; * 25; * 26; * Description: 27; * Contains module for DC/AC coefficient prediction 28; * 29; * 30; * Function: omxVCM4P2_PredictReconCoefIntra 31; * 32; * Description: 33; * Performs adaptive DC/AC coefficient prediction for an intra block. Prior 34; * to the function call, prediction direction (predDir) should be selected 35; * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2. 36; * 37; * Remarks: 38; * 39; * Parameters: 40; * [in] pSrcDst pointer to the coefficient buffer which contains the 41; * quantized coefficient residuals (PQF) of the current 42; * block; must be aligned on a 4-byte boundary. The 43; * output coefficients are saturated to the range 44; * [-2048, 2047]. 45; * [in] pPredBufRow pointer to the coefficient row buffer; must be aligned 46; * on a 4-byte boundary. 47; * [in] pPredBufCol pointer to the coefficient column buffer; must be 48; * aligned on a 4-byte boundary. 49; * [in] curQP quantization parameter of the current block. curQP may 50; * equal to predQP especially when the current block and 51; * the predictor block are in the same macroblock. 52; * [in] predQP quantization parameter of the predictor block 53; * [in] predDir indicates the prediction direction which takes one 54; * of the following values: 55; * OMX_VIDEO_HORIZONTAL predict horizontally 56; * OMX_VIDEO_VERTICAL predict vertically 57; * [in] ACPredFlag a flag indicating if AC prediction should be 58; * performed. It is equal to ac_pred_flag in the bit 59; * stream syntax of MPEG-4 60; * [in] videoComp video component type (luminance, chrominance or 61; * alpha) of the current block 62; * [out] pSrcDst pointer to the coefficient buffer which contains 63; * the quantized coefficients (QF) of the current 64; * block 65; * [out] pPredBufRow pointer to the updated coefficient row buffer 66; * [out] pPredBufCol pointer to the updated coefficient column buffer 67; * Return Value: 68; * OMX_Sts_NoErr - no error 69; * OMX_Sts_BadArgErr - Bad arguments 70; * - At least one of the pointers is NULL: pSrcDst, pPredBufRow, or pPredBufCol. 71; * - At least one the following cases: curQP <= 0, predQP <= 0, curQP >31, 72; * predQP > 31, preDir exceeds [1,2]. 73; * - At least one of the pointers pSrcDst, pPredBufRow, or pPredBufCol is not 74; * 4-byte aligned. 75; * 76; ********* 77 78 INCLUDE omxtypes_s.h 79 INCLUDE armCOMM_s.h 80 81 M_VARIANTS CortexA8 82 83 84 85 IMPORT armVCM4P2_Reciprocal_QP_S32 86 IMPORT armVCM4P2_Reciprocal_QP_S16 87 IMPORT armVCM4P2_DCScaler 88 89 IF CortexA8 90;// Input Arguments 91 92pSrcDst RN 0 93pPredBufRow RN 1 94pPredBufCol RN 2 95curQP RN 3 96QP RN 3 97predQP RN 4 98predDir RN 5 99ACPredFlag RN 6 100videoComp RN 7 101 102;// Local Variables 103 104shortVideoHeader RN 4 105dcScaler RN 4 106index RN 6 107predCoeffTable RN 7 108temp1 RN 6 109temp2 RN 9 110temp RN 14 111Const RN 8 112temppPredColBuf RN 8 113tempPred RN 9 114 115absCoeffDC RN 8 116negdcScaler RN 10 117Rem RN 11 118temp3 RN 12 119 120dcRowbufCoeff RN 10 121dcColBuffCoeff RN 11 122Return RN 0 123 124;//NEON Registers 125 126qPredRowBuf QN Q0.S16 127dPredRowBuf0 DN D0.S16 128dPredRowBuf1 DN D1.S16 129 130 131 132 133qCoeffTab QN Q1.S32 134 135qPredQP QN Q2.S16 136dPredQP0 DN D4.S16 137dPredQP1 DN D5.S16 138 139 140qtemp1 QN Q3.S32 141qtemp QN Q3.S16 142 143dtemp0 DN D6.S16 144dtemp1 DN D7.S16 145 146dtemp2 DN D8.S16 147dtemp3 DN D9.S16 148 149dtemp4 DN D2.S16 150dtemp5 DN D3.S16 151dtemp6 DN D4.S16 152dtemp7 DN D5.S16 153 154qtempPred1 QN Q5.S32 155qtempPred QN Q5.S16 156 157dtempPred0 DN D10.S16 158dtempPred1 DN D11.S16 159 160 161 162 M_START omxVCM4P2_PredictReconCoefIntra,r11,d11 163 164 ;// Assigning pointers to Input arguments on Stack 165 166 M_ARG predQPonStack,4 167 M_ARG predDironStack,4 168 M_ARG ACPredFlagonStack,4 169 M_ARG videoComponStack,4 170 171 ;// DC Prediction 172 173 M_LDR videoComp,videoComponStack ;// Load videoComp From Stack 174 175 M_LDR predDir,predDironStack ;// Load Prediction direction 176 ;// DC Scaler calculation 177 LDR index, =armVCM4P2_DCScaler 178 ADD index,index,videoComp,LSL #5 179 LDRB dcScaler,[index,QP] 180 181 182 LDR predCoeffTable, =armVCM4P2_Reciprocal_QP_S16 ;// Loading the table with entries 32767/(1 to 63) 183 CMP predDir,#2 ;// Check if the Prediction direction is vertical 184 185 ;// Caulucate tempPred 186 187 LDREQSH absCoeffDC,[pPredBufRow] ;// If vetical load the coeff from Row Prediction Buffer 188 LDRNESH absCoeffDC,[pPredBufCol] ;// If horizontal load the coeff from column Prediction Buffer 189 190 RSB negdcScaler,dcScaler,#0 ;// negdcScaler=-dcScaler 191 MOV temp1,absCoeffDC ;// Load the Prediction coeff to temp for comparision 192 CMP temp1,#0 193 RSBLT absCoeffDC,temp1,#0 ;// calculate absolute val of prediction coeff 194 195 ADD temp,dcScaler,dcScaler 196 LDRH temp,[predCoeffTable,temp] ;// Load value from coeff table for performing division using multiplication 197 SMULBB tempPred,temp,absCoeffDC ;// tempped=pPredBufRow(Col)[0]*32767/dcScaler 198 ADD temp3,dcScaler,#1 199 LSR tempPred,tempPred,#15 ;// tempped=pPredBufRow(Col)[0]/dcScaler 200 LSR temp3,temp3,#1 ;// temp3=round(dcScaler/2) 201 MLA Rem,negdcScaler,tempPred,absCoeffDC ;// Remainder Rem=abs(pPredBufRow(Col)[0])-tempPred*dcScaler 202 203 LDRH dcRowbufCoeff,[pPredBufCol] 204 205 CMP Rem,temp3 ;// compare Rem with (dcScaler/2) 206 ADDGE tempPred,#1 ;// tempPred=tempPred+1 if Rem>=(dcScaler/2) 207 CMP temp1,#0 208 RSBLT tempPred,tempPred,#0 ;// tempPred=-tempPred if 209 210 STRH dcRowbufCoeff,[pPredBufRow,#-16] 211 212 213 LDRH temp,[pSrcDst] ;// temp=pSrcDst[0] 214 ADD temp,temp,tempPred ;// temp=pSrcDst[0]+tempPred 215 SSAT16 temp,#12,temp ;// clip temp to [-2048,2047] 216 SMULBB dcColBuffCoeff,temp,dcScaler ;// temp1=clipped(pSrcDst[0])*dcScaler 217 M_LDR ACPredFlag,ACPredFlagonStack 218 STRH dcColBuffCoeff,[pPredBufCol] 219 220 221 ;// AC Prediction 222 223 M_LDR predQP,predQPonStack 224 225 CMP ACPredFlag,#1 ;// Check if the AC prediction flag is set or not 226 BNE Exit ;// If not set Exit 227 CMP predDir,#2 ;// Check the Prediction direction 228 LDR predCoeffTable, =armVCM4P2_Reciprocal_QP_S32 ;// Loading the table with entries 0x1ffff/(1 to 63) 229 MOV Const,#4 230 MUL curQP,curQP,Const ;// curQP=4*curQP 231 VDUP dPredQP0,predQP 232 LDR temp2,[predCoeffTable,curQP] ;// temp=0x1ffff/curQP 233 VDUP qCoeffTab,temp2 234 BNE Horizontal ;// If the Prediction direction is horizontal branch to Horizontal 235 236 237 238 ;// Vertical 239 ;//Calculating tempPred 240 241 VLD1 {dPredRowBuf0,dPredRowBuf1},[pPredBufRow] ;// Loading pPredBufRow[i]:i=0 t0 7 242 243 VMULL qtemp1,dPredRowBuf0,dPredQP0 ;//qtemp1[i]=pPredBufRow[i]*dPredQP[i]: i=0 t0 3 244 VMUL qtempPred1,qtemp1,qCoeffTab ;//qtempPred1[i]=pPredBufRow[i]*dPredQP[i]*0x1ffff/curQP : i=0 t0 3 245 246 VMULL qtemp1,dPredRowBuf1,dPredQP0 ;//qtemp1[i]=pPredBufRow[i]*dPredQP[i] : i=4 t0 7 247 248 VRSHR qtempPred1,qtempPred1,#17 ;//qtempPred1[i]=round(pPredBufRow[i]*dPredQP[i]/curQP) : i=0 t0 3 249 VSHRN dPredQP1,qtempPred1,#0 ;// narrow qtempPred1[i] to 16 bits 250 251 252 VMUL qtempPred1,qtemp1,qCoeffTab ;//qtempPred1[i]=pPredBufRow[i]*dPredQP[i]*0x1ffff/curQP : i=4 t0 7 253 VRSHR qtempPred1,qtempPred1,#17 ;//qtempPred1[i]=round(pPredBufRow[i]*dPredQP[i]/curQP) : i=4 t0 7 254 VLD1 {dtemp0,dtemp1},[pSrcDst] ;//Loading pSrcDst[i] : i=0 to 7 255 VSHRN dtempPred1,qtempPred1,#0 ;// narrow qtempPred1[i] to 16 bits 256 VMOV dtempPred0,dPredQP1 257 258 ;//updating source and row prediction buffer contents 259 VADD qtemp,qtemp,qtempPred ;//pSrcDst[i]=pSrcDst[i]+qtempPred[i]: i=0 to 7 260 VQSHL qtemp,qtemp,#4 ;//Clip to [-2048,2047] 261 LDRH dcRowbufCoeff,[pPredBufRow] ;//Loading Dc Value of Row Prediction buffer 262 VSHR qtemp,qtemp,#4 263 264 VST1 {dtemp0,dtemp1},[pSrcDst] ;//storing back the updated values 265 VST1 {dtemp0,dtemp1},[pPredBufRow] ;//storing back the updated row prediction values 266 STRH dcRowbufCoeff,[pPredBufRow] ;// storing the updated DC Row Prediction coeff 267 268 B Exit 269 270Horizontal 271 272 ;// Calculating Temppred 273 274 275 276 VLD1 {dPredRowBuf0,dPredRowBuf1},[pPredBufCol] ;// Loading pPredBufCol[i]:i=0 t0 7 277 VMULL qtemp1,dPredRowBuf0,dPredQP0 ;//qtemp1[i]=pPredBufCol[i]*dPredQP[i]: i=0 t0 3 278 VMUL qtempPred1,qtemp1,qCoeffTab ;//qtempPred1[i]=pPredBufCol[i]*dPredQP[i]*0x1ffff/curQP : i=0 t0 3 279 280 VMULL qtemp1,dPredRowBuf1,dPredQP0 ;//qtemp1[i]=pPredBufCol[i]*dPredQP[i] : i=4 t0 7 281 282 VRSHR qtempPred1,qtempPred1,#17 ;//qtempPred1[i]=round(pPredBufCol[i]*dPredQP[i]/curQP) : i=0 t0 3 283 VSHRN dPredQP1,qtempPred1,#0 ;// narrow qtempPred1[i] to 16 bits 284 285 286 VMUL qtempPred1,qtemp1,qCoeffTab ;//qtempPred1[i]=pPredBufCol[i]*dPredQP[i]*0x1ffff/curQP : i=4 t0 7 287 288 MOV temppPredColBuf,pPredBufCol 289 VRSHR qtempPred1,qtempPred1,#17 ;//qtempPred1[i]=round(pPredBufCol[i]*dPredQP[i]/curQP) : i=4 t0 7 290 VLD4 {dtemp0,dtemp1,dtemp2,dtemp3},[pSrcDst] ;// Loading coefficients Interleaving by 4 291 VSHRN dtempPred1,qtempPred1,#0 ;// narrow qtempPred1[i] to 16 bits 292 VMOV dtempPred0,dPredQP1 293 294 ;// Updating source and column prediction buffer contents 295 ADD temp2,pSrcDst,#32 296 VLD4 {dtemp4,dtemp5,dtemp6,dtemp7},[temp2] ;// Loading next 16 coefficients Interleaving by 4 297 VUZP dtemp0,dtemp4 ;// Interleaving by 8 298 VADD dtemp0,dtemp0,dtempPred0 ;// Adding tempPred to coeffs 299 VQSHL dtemp0,dtemp0,#4 ;// Clip to [-2048,2047] 300 VSHR dtemp0,dtemp0,#4 301 VST1 {dtemp0},[pPredBufCol]! ;// Updating Pridiction column buffer 302 VZIP dtemp0,dtemp4 ;// deinterleaving 303 VST4 {dtemp0,dtemp1,dtemp2,dtemp3},[pSrcDst] ;// Updating source coeffs 304 VST4 {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]! 305 306 MOV temp1,temp2 307 VLD4 {dtemp0,dtemp1,dtemp2,dtemp3},[temp2]! ;// Loading coefficients Interleaving by 4 308 309 VLD4 {dtemp4,dtemp5,dtemp6,dtemp7},[temp2] 310 VUZP dtemp0,dtemp4 ;// Interleaving by 8 311 VADD dtemp0,dtemp0,dtempPred1 312 VQSHL dtemp0,dtemp0,#4 ;// Clip to [-2048,2047] 313 VSHR dtemp0,dtemp0,#4 314 VST1 {dtemp0},[pPredBufCol]! 315 VZIP dtemp0,dtemp4 316 VST4 {dtemp0,dtemp1,dtemp2,dtemp3},[temp1] 317 STRH dcColBuffCoeff,[temppPredColBuf] 318 VST4 {dtemp4,dtemp5,dtemp6,dtemp7},[temp2] 319 320Exit 321 322 STRH temp,[pSrcDst] 323 324 325 MOV Return,#OMX_Sts_NoErr 326 327 M_END 328 ENDIF 329 330 331 END 332 333 334 335