10c1bc742181ded4930842b46e9507372f0b1b963James Dong; **********
20c1bc742181ded4930842b46e9507372f0b1b963James Dong; *
30c1bc742181ded4930842b46e9507372f0b1b963James Dong; * File Name:  omxVCM4P2_PredictReconCoefIntra_s.s
40c1bc742181ded4930842b46e9507372f0b1b963James Dong; * OpenMAX DL: v1.0.2
50c1bc742181ded4930842b46e9507372f0b1b963James Dong; * Revision:   12290
60c1bc742181ded4930842b46e9507372f0b1b963James Dong; * Date:       Wednesday, April 9, 2008
70c1bc742181ded4930842b46e9507372f0b1b963James Dong; *
80c1bc742181ded4930842b46e9507372f0b1b963James Dong; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
90c1bc742181ded4930842b46e9507372f0b1b963James Dong; *
100c1bc742181ded4930842b46e9507372f0b1b963James Dong; *
110c1bc742181ded4930842b46e9507372f0b1b963James Dong; *
120c1bc742181ded4930842b46e9507372f0b1b963James Dong; * Description:
130c1bc742181ded4930842b46e9507372f0b1b963James Dong; * Contains module for DC/AC coefficient prediction
140c1bc742181ded4930842b46e9507372f0b1b963James Dong; *
150c1bc742181ded4930842b46e9507372f0b1b963James Dong; *
160c1bc742181ded4930842b46e9507372f0b1b963James Dong; * Function: omxVCM4P2_PredictReconCoefIntra
170c1bc742181ded4930842b46e9507372f0b1b963James Dong; *
180c1bc742181ded4930842b46e9507372f0b1b963James Dong; * Description:
190c1bc742181ded4930842b46e9507372f0b1b963James Dong; * Performs adaptive DC/AC coefficient prediction for an intra block. Prior
200c1bc742181ded4930842b46e9507372f0b1b963James Dong; * to the function call, prediction direction (predDir) should be selected
210c1bc742181ded4930842b46e9507372f0b1b963James Dong; * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2.
220c1bc742181ded4930842b46e9507372f0b1b963James Dong; *
230c1bc742181ded4930842b46e9507372f0b1b963James Dong; * Remarks:
240c1bc742181ded4930842b46e9507372f0b1b963James Dong; *
250c1bc742181ded4930842b46e9507372f0b1b963James Dong; * Parameters:
260c1bc742181ded4930842b46e9507372f0b1b963James Dong; * [in]  pSrcDst      pointer to the coefficient buffer which contains the
270c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    quantized coefficient residuals (PQF) of the current
280c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    block; must be aligned on a 4-byte boundary. The
290c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    output coefficients are saturated to the range
300c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    [-2048, 2047].
310c1bc742181ded4930842b46e9507372f0b1b963James Dong; * [in]  pPredBufRow  pointer to the coefficient row buffer; must be aligned
320c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    on a 4-byte boundary.
330c1bc742181ded4930842b46e9507372f0b1b963James Dong; * [in]  pPredBufCol  pointer to the coefficient column buffer; must be
340c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    aligned on a 4-byte boundary.
350c1bc742181ded4930842b46e9507372f0b1b963James Dong; * [in]  curQP        quantization parameter of the current block. curQP may
360c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    equal to predQP especially when the current block and
370c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    the predictor block are in the same macroblock.
380c1bc742181ded4930842b46e9507372f0b1b963James Dong; * [in]  predQP       quantization parameter of the predictor block
390c1bc742181ded4930842b46e9507372f0b1b963James Dong; * [in]  predDir      indicates the prediction direction which takes one
400c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    of the following values:
410c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    OMX_VIDEO_HORIZONTAL    predict horizontally
420c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    OMX_VIDEO_VERTICAL        predict vertically
430c1bc742181ded4930842b46e9507372f0b1b963James Dong; * [in]  ACPredFlag   a flag indicating if AC prediction should be
440c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    performed. It is equal to ac_pred_flag in the bit
450c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    stream syntax of MPEG-4
460c1bc742181ded4930842b46e9507372f0b1b963James Dong; * [in]  videoComp    video component type (luminance, chrominance or
470c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    alpha) of the current block
480c1bc742181ded4930842b46e9507372f0b1b963James Dong; * [out] pSrcDst      pointer to the coefficient buffer which contains
490c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    the quantized coefficients (QF) of the current
500c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    block
510c1bc742181ded4930842b46e9507372f0b1b963James Dong; * [out] pPredBufRow  pointer to the updated coefficient row buffer
520c1bc742181ded4930842b46e9507372f0b1b963James Dong; * [out] pPredBufCol  pointer to the updated coefficient column buffer
530c1bc742181ded4930842b46e9507372f0b1b963James Dong; * Return Value:
540c1bc742181ded4930842b46e9507372f0b1b963James Dong; * OMX_Sts_NoErr - no error
550c1bc742181ded4930842b46e9507372f0b1b963James Dong; * OMX_Sts_BadArgErr - Bad arguments
560c1bc742181ded4930842b46e9507372f0b1b963James Dong; * - At least one of the pointers is NULL: pSrcDst, pPredBufRow, or pPredBufCol.
570c1bc742181ded4930842b46e9507372f0b1b963James Dong; * - At least one the following cases: curQP <= 0, predQP <= 0, curQP >31,
580c1bc742181ded4930842b46e9507372f0b1b963James Dong; *   predQP > 31, preDir exceeds [1,2].
590c1bc742181ded4930842b46e9507372f0b1b963James Dong; * - At least one of the pointers pSrcDst, pPredBufRow, or pPredBufCol is not
600c1bc742181ded4930842b46e9507372f0b1b963James Dong; *   4-byte aligned.
610c1bc742181ded4930842b46e9507372f0b1b963James Dong; *
620c1bc742181ded4930842b46e9507372f0b1b963James Dong; *********
630c1bc742181ded4930842b46e9507372f0b1b963James Dong
640c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE omxtypes_s.h
650c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE armCOMM_s.h
660c1bc742181ded4930842b46e9507372f0b1b963James Dong
670c1bc742181ded4930842b46e9507372f0b1b963James Dong       M_VARIANTS CortexA8
680c1bc742181ded4930842b46e9507372f0b1b963James Dong
690c1bc742181ded4930842b46e9507372f0b1b963James Dong
700c1bc742181ded4930842b46e9507372f0b1b963James Dong
710c1bc742181ded4930842b46e9507372f0b1b963James Dong       IMPORT        armVCM4P2_Reciprocal_QP_S32
720c1bc742181ded4930842b46e9507372f0b1b963James Dong       IMPORT        armVCM4P2_Reciprocal_QP_S16
730c1bc742181ded4930842b46e9507372f0b1b963James Dong       IMPORT        armVCM4P2_DCScaler
740c1bc742181ded4930842b46e9507372f0b1b963James Dong
750c1bc742181ded4930842b46e9507372f0b1b963James Dong        IF CortexA8
760c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Input Arguments
770c1bc742181ded4930842b46e9507372f0b1b963James Dong
780c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcDst          RN 0
790c1bc742181ded4930842b46e9507372f0b1b963James DongpPredBufRow      RN 1
800c1bc742181ded4930842b46e9507372f0b1b963James DongpPredBufCol      RN 2
810c1bc742181ded4930842b46e9507372f0b1b963James DongcurQP            RN 3
820c1bc742181ded4930842b46e9507372f0b1b963James DongQP               RN 3
830c1bc742181ded4930842b46e9507372f0b1b963James DongpredQP           RN 4
840c1bc742181ded4930842b46e9507372f0b1b963James DongpredDir          RN 5
850c1bc742181ded4930842b46e9507372f0b1b963James DongACPredFlag       RN 6
860c1bc742181ded4930842b46e9507372f0b1b963James DongvideoComp        RN 7
870c1bc742181ded4930842b46e9507372f0b1b963James Dong
880c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Local Variables
890c1bc742181ded4930842b46e9507372f0b1b963James Dong
900c1bc742181ded4930842b46e9507372f0b1b963James DongshortVideoHeader RN 4
910c1bc742181ded4930842b46e9507372f0b1b963James DongdcScaler         RN 4
920c1bc742181ded4930842b46e9507372f0b1b963James Dongindex            RN 6
930c1bc742181ded4930842b46e9507372f0b1b963James DongpredCoeffTable   RN 7
940c1bc742181ded4930842b46e9507372f0b1b963James Dongtemp1            RN 6
950c1bc742181ded4930842b46e9507372f0b1b963James Dongtemp2            RN 9
960c1bc742181ded4930842b46e9507372f0b1b963James Dongtemp             RN 14
970c1bc742181ded4930842b46e9507372f0b1b963James DongConst            RN 8
980c1bc742181ded4930842b46e9507372f0b1b963James DongtemppPredColBuf  RN 8
990c1bc742181ded4930842b46e9507372f0b1b963James DongtempPred         RN 9
1000c1bc742181ded4930842b46e9507372f0b1b963James Dong
1010c1bc742181ded4930842b46e9507372f0b1b963James DongabsCoeffDC       RN 8
1020c1bc742181ded4930842b46e9507372f0b1b963James DongnegdcScaler      RN 10
1030c1bc742181ded4930842b46e9507372f0b1b963James DongRem              RN 11
1040c1bc742181ded4930842b46e9507372f0b1b963James Dongtemp3            RN 12
1050c1bc742181ded4930842b46e9507372f0b1b963James Dong
1060c1bc742181ded4930842b46e9507372f0b1b963James DongdcRowbufCoeff    RN 10
1070c1bc742181ded4930842b46e9507372f0b1b963James DongdcColBuffCoeff   RN 11
1080c1bc742181ded4930842b46e9507372f0b1b963James DongReturn           RN 0
1090c1bc742181ded4930842b46e9507372f0b1b963James Dong
1100c1bc742181ded4930842b46e9507372f0b1b963James Dong;//NEON Registers
1110c1bc742181ded4930842b46e9507372f0b1b963James Dong
1120c1bc742181ded4930842b46e9507372f0b1b963James DongqPredRowBuf       QN Q0.S16
1130c1bc742181ded4930842b46e9507372f0b1b963James DongdPredRowBuf0      DN D0.S16
1140c1bc742181ded4930842b46e9507372f0b1b963James DongdPredRowBuf1      DN D1.S16
1150c1bc742181ded4930842b46e9507372f0b1b963James Dong
1160c1bc742181ded4930842b46e9507372f0b1b963James Dong
1170c1bc742181ded4930842b46e9507372f0b1b963James Dong
1180c1bc742181ded4930842b46e9507372f0b1b963James Dong
1190c1bc742181ded4930842b46e9507372f0b1b963James DongqCoeffTab         QN Q1.S32
1200c1bc742181ded4930842b46e9507372f0b1b963James Dong
1210c1bc742181ded4930842b46e9507372f0b1b963James DongqPredQP           QN Q2.S16
1220c1bc742181ded4930842b46e9507372f0b1b963James DongdPredQP0          DN D4.S16
1230c1bc742181ded4930842b46e9507372f0b1b963James DongdPredQP1          DN D5.S16
1240c1bc742181ded4930842b46e9507372f0b1b963James Dong
1250c1bc742181ded4930842b46e9507372f0b1b963James Dong
1260c1bc742181ded4930842b46e9507372f0b1b963James Dongqtemp1            QN Q3.S32
1270c1bc742181ded4930842b46e9507372f0b1b963James Dongqtemp             QN Q3.S16
1280c1bc742181ded4930842b46e9507372f0b1b963James Dong
1290c1bc742181ded4930842b46e9507372f0b1b963James Dongdtemp0            DN D6.S16
1300c1bc742181ded4930842b46e9507372f0b1b963James Dongdtemp1            DN D7.S16
1310c1bc742181ded4930842b46e9507372f0b1b963James Dong
1320c1bc742181ded4930842b46e9507372f0b1b963James Dongdtemp2            DN D8.S16
1330c1bc742181ded4930842b46e9507372f0b1b963James Dongdtemp3            DN D9.S16
1340c1bc742181ded4930842b46e9507372f0b1b963James Dong
1350c1bc742181ded4930842b46e9507372f0b1b963James Dongdtemp4            DN D2.S16
1360c1bc742181ded4930842b46e9507372f0b1b963James Dongdtemp5            DN D3.S16
1370c1bc742181ded4930842b46e9507372f0b1b963James Dongdtemp6            DN D4.S16
1380c1bc742181ded4930842b46e9507372f0b1b963James Dongdtemp7            DN D5.S16
1390c1bc742181ded4930842b46e9507372f0b1b963James Dong
1400c1bc742181ded4930842b46e9507372f0b1b963James DongqtempPred1        QN Q5.S32
1410c1bc742181ded4930842b46e9507372f0b1b963James DongqtempPred         QN Q5.S16
1420c1bc742181ded4930842b46e9507372f0b1b963James Dong
1430c1bc742181ded4930842b46e9507372f0b1b963James DongdtempPred0        DN D10.S16
1440c1bc742181ded4930842b46e9507372f0b1b963James DongdtempPred1        DN D11.S16
1450c1bc742181ded4930842b46e9507372f0b1b963James Dong
1460c1bc742181ded4930842b46e9507372f0b1b963James Dong
1470c1bc742181ded4930842b46e9507372f0b1b963James Dong
1480c1bc742181ded4930842b46e9507372f0b1b963James Dong      M_START   omxVCM4P2_PredictReconCoefIntra,r11,d11
1490c1bc742181ded4930842b46e9507372f0b1b963James Dong
1500c1bc742181ded4930842b46e9507372f0b1b963James Dong      ;// Assigning pointers to Input arguments on Stack
1510c1bc742181ded4930842b46e9507372f0b1b963James Dong
1520c1bc742181ded4930842b46e9507372f0b1b963James Dong      M_ARG           predQPonStack,4
1530c1bc742181ded4930842b46e9507372f0b1b963James Dong      M_ARG           predDironStack,4
1540c1bc742181ded4930842b46e9507372f0b1b963James Dong      M_ARG           ACPredFlagonStack,4
1550c1bc742181ded4930842b46e9507372f0b1b963James Dong      M_ARG           videoComponStack,4
1560c1bc742181ded4930842b46e9507372f0b1b963James Dong
1570c1bc742181ded4930842b46e9507372f0b1b963James Dong      ;// DC Prediction
1580c1bc742181ded4930842b46e9507372f0b1b963James Dong
1590c1bc742181ded4930842b46e9507372f0b1b963James Dong      M_LDR           videoComp,videoComponStack                     ;// Load videoComp From Stack
1600c1bc742181ded4930842b46e9507372f0b1b963James Dong
1610c1bc742181ded4930842b46e9507372f0b1b963James Dong      M_LDR           predDir,predDironStack                         ;// Load Prediction direction
1620c1bc742181ded4930842b46e9507372f0b1b963James Dong      ;// DC Scaler calculation
1630c1bc742181ded4930842b46e9507372f0b1b963James Dong      LDR             index, =armVCM4P2_DCScaler
1640c1bc742181ded4930842b46e9507372f0b1b963James Dong      ADD             index,index,videoComp,LSL #5
1650c1bc742181ded4930842b46e9507372f0b1b963James Dong      LDRB            dcScaler,[index,QP]
1660c1bc742181ded4930842b46e9507372f0b1b963James Dong
1670c1bc742181ded4930842b46e9507372f0b1b963James Dong
1680c1bc742181ded4930842b46e9507372f0b1b963James Dong      LDR             predCoeffTable, =armVCM4P2_Reciprocal_QP_S16   ;// Loading the table with entries 32767/(1 to 63)
1690c1bc742181ded4930842b46e9507372f0b1b963James Dong      CMP             predDir,#2                                     ;// Check if the Prediction direction is vertical
1700c1bc742181ded4930842b46e9507372f0b1b963James Dong
1710c1bc742181ded4930842b46e9507372f0b1b963James Dong      ;// Caulucate tempPred
1720c1bc742181ded4930842b46e9507372f0b1b963James Dong
1730c1bc742181ded4930842b46e9507372f0b1b963James Dong      LDREQSH         absCoeffDC,[pPredBufRow]                       ;// If vetical load the coeff from Row Prediction Buffer
1740c1bc742181ded4930842b46e9507372f0b1b963James Dong      LDRNESH         absCoeffDC,[pPredBufCol]                       ;// If horizontal load the coeff from column Prediction Buffer
1750c1bc742181ded4930842b46e9507372f0b1b963James Dong
1760c1bc742181ded4930842b46e9507372f0b1b963James Dong      RSB             negdcScaler,dcScaler,#0                        ;// negdcScaler=-dcScaler
1770c1bc742181ded4930842b46e9507372f0b1b963James Dong      MOV             temp1,absCoeffDC                               ;// Load the Prediction coeff to temp for comparision
1780c1bc742181ded4930842b46e9507372f0b1b963James Dong      CMP             temp1,#0
1790c1bc742181ded4930842b46e9507372f0b1b963James Dong      RSBLT           absCoeffDC,temp1,#0                            ;// calculate absolute val of prediction coeff
1800c1bc742181ded4930842b46e9507372f0b1b963James Dong
1810c1bc742181ded4930842b46e9507372f0b1b963James Dong      ADD             temp,dcScaler,dcScaler
1820c1bc742181ded4930842b46e9507372f0b1b963James Dong      LDRH            temp,[predCoeffTable,temp]                     ;// Load value from coeff table for performing division using multiplication
1830c1bc742181ded4930842b46e9507372f0b1b963James Dong      SMULBB          tempPred,temp,absCoeffDC                       ;// tempped=pPredBufRow(Col)[0]*32767/dcScaler
1840c1bc742181ded4930842b46e9507372f0b1b963James Dong      ADD             temp3,dcScaler,#1
1850c1bc742181ded4930842b46e9507372f0b1b963James Dong      LSR             tempPred,tempPred,#15                          ;// tempped=pPredBufRow(Col)[0]/dcScaler
1860c1bc742181ded4930842b46e9507372f0b1b963James Dong      LSR             temp3,temp3,#1                                 ;// temp3=round(dcScaler/2)
1870c1bc742181ded4930842b46e9507372f0b1b963James Dong      MLA             Rem,negdcScaler,tempPred,absCoeffDC            ;// Remainder Rem=abs(pPredBufRow(Col)[0])-tempPred*dcScaler
1880c1bc742181ded4930842b46e9507372f0b1b963James Dong
1890c1bc742181ded4930842b46e9507372f0b1b963James Dong      LDRH            dcRowbufCoeff,[pPredBufCol]
1900c1bc742181ded4930842b46e9507372f0b1b963James Dong
1910c1bc742181ded4930842b46e9507372f0b1b963James Dong      CMP             Rem,temp3                                      ;// compare Rem with (dcScaler/2)
1920c1bc742181ded4930842b46e9507372f0b1b963James Dong      ADDGE           tempPred,#1                                    ;// tempPred=tempPred+1 if Rem>=(dcScaler/2)
1930c1bc742181ded4930842b46e9507372f0b1b963James Dong      CMP             temp1,#0
1940c1bc742181ded4930842b46e9507372f0b1b963James Dong      RSBLT           tempPred,tempPred,#0                           ;// tempPred=-tempPred if
1950c1bc742181ded4930842b46e9507372f0b1b963James Dong
1960c1bc742181ded4930842b46e9507372f0b1b963James Dong      STRH            dcRowbufCoeff,[pPredBufRow,#-16]
1970c1bc742181ded4930842b46e9507372f0b1b963James Dong
1980c1bc742181ded4930842b46e9507372f0b1b963James Dong
1990c1bc742181ded4930842b46e9507372f0b1b963James Dong      LDRH            temp,[pSrcDst]                                 ;// temp=pSrcDst[0]
2000c1bc742181ded4930842b46e9507372f0b1b963James Dong      ADD             temp,temp,tempPred                             ;// temp=pSrcDst[0]+tempPred
2010c1bc742181ded4930842b46e9507372f0b1b963James Dong      SSAT16          temp,#12,temp                                  ;// clip temp to [-2048,2047]
2020c1bc742181ded4930842b46e9507372f0b1b963James Dong      SMULBB          dcColBuffCoeff,temp,dcScaler                   ;// temp1=clipped(pSrcDst[0])*dcScaler
2030c1bc742181ded4930842b46e9507372f0b1b963James Dong      M_LDR           ACPredFlag,ACPredFlagonStack
2040c1bc742181ded4930842b46e9507372f0b1b963James Dong      STRH            dcColBuffCoeff,[pPredBufCol]
2050c1bc742181ded4930842b46e9507372f0b1b963James Dong
2060c1bc742181ded4930842b46e9507372f0b1b963James Dong
2070c1bc742181ded4930842b46e9507372f0b1b963James Dong       ;// AC Prediction
2080c1bc742181ded4930842b46e9507372f0b1b963James Dong
2090c1bc742181ded4930842b46e9507372f0b1b963James Dong      M_LDR           predQP,predQPonStack
2100c1bc742181ded4930842b46e9507372f0b1b963James Dong
2110c1bc742181ded4930842b46e9507372f0b1b963James Dong      CMP             ACPredFlag,#1                                  ;// Check if the AC prediction flag is set or not
2120c1bc742181ded4930842b46e9507372f0b1b963James Dong      BNE             Exit                                           ;// If not set Exit
2130c1bc742181ded4930842b46e9507372f0b1b963James Dong      CMP             predDir,#2                                     ;// Check the Prediction direction
2140c1bc742181ded4930842b46e9507372f0b1b963James Dong      LDR             predCoeffTable, =armVCM4P2_Reciprocal_QP_S32   ;// Loading the table with entries 0x1ffff/(1 to 63)
2150c1bc742181ded4930842b46e9507372f0b1b963James Dong      MOV             Const,#4
2160c1bc742181ded4930842b46e9507372f0b1b963James Dong      MUL             curQP,curQP,Const                              ;// curQP=4*curQP
2170c1bc742181ded4930842b46e9507372f0b1b963James Dong      VDUP            dPredQP0,predQP
2180c1bc742181ded4930842b46e9507372f0b1b963James Dong      LDR             temp2,[predCoeffTable,curQP]                   ;// temp=0x1ffff/curQP
2190c1bc742181ded4930842b46e9507372f0b1b963James Dong      VDUP            qCoeffTab,temp2
2200c1bc742181ded4930842b46e9507372f0b1b963James Dong      BNE             Horizontal                                     ;// If the Prediction direction is horizontal branch to Horizontal
2210c1bc742181ded4930842b46e9507372f0b1b963James Dong
2220c1bc742181ded4930842b46e9507372f0b1b963James Dong
2230c1bc742181ded4930842b46e9507372f0b1b963James Dong
2240c1bc742181ded4930842b46e9507372f0b1b963James Dong      ;// Vertical
2250c1bc742181ded4930842b46e9507372f0b1b963James Dong      ;//Calculating tempPred
2260c1bc742181ded4930842b46e9507372f0b1b963James Dong
2270c1bc742181ded4930842b46e9507372f0b1b963James Dong      VLD1            {dPredRowBuf0,dPredRowBuf1},[pPredBufRow]      ;// Loading pPredBufRow[i]:i=0 t0 7
2280c1bc742181ded4930842b46e9507372f0b1b963James Dong
2290c1bc742181ded4930842b46e9507372f0b1b963James Dong      VMULL           qtemp1,dPredRowBuf0,dPredQP0                   ;//qtemp1[i]=pPredBufRow[i]*dPredQP[i]: i=0 t0 3
2300c1bc742181ded4930842b46e9507372f0b1b963James Dong      VMUL            qtempPred1,qtemp1,qCoeffTab                    ;//qtempPred1[i]=pPredBufRow[i]*dPredQP[i]*0x1ffff/curQP : i=0 t0 3
2310c1bc742181ded4930842b46e9507372f0b1b963James Dong
2320c1bc742181ded4930842b46e9507372f0b1b963James Dong      VMULL           qtemp1,dPredRowBuf1,dPredQP0                   ;//qtemp1[i]=pPredBufRow[i]*dPredQP[i] : i=4 t0 7
2330c1bc742181ded4930842b46e9507372f0b1b963James Dong
2340c1bc742181ded4930842b46e9507372f0b1b963James Dong      VRSHR           qtempPred1,qtempPred1,#17                      ;//qtempPred1[i]=round(pPredBufRow[i]*dPredQP[i]/curQP) : i=0 t0 3
2350c1bc742181ded4930842b46e9507372f0b1b963James Dong      VSHRN           dPredQP1,qtempPred1,#0                         ;// narrow qtempPred1[i] to 16 bits
2360c1bc742181ded4930842b46e9507372f0b1b963James Dong
2370c1bc742181ded4930842b46e9507372f0b1b963James Dong
2380c1bc742181ded4930842b46e9507372f0b1b963James Dong      VMUL            qtempPred1,qtemp1,qCoeffTab                    ;//qtempPred1[i]=pPredBufRow[i]*dPredQP[i]*0x1ffff/curQP : i=4 t0 7
2390c1bc742181ded4930842b46e9507372f0b1b963James Dong      VRSHR           qtempPred1,qtempPred1,#17                      ;//qtempPred1[i]=round(pPredBufRow[i]*dPredQP[i]/curQP)  : i=4 t0 7
2400c1bc742181ded4930842b46e9507372f0b1b963James Dong      VLD1            {dtemp0,dtemp1},[pSrcDst]                      ;//Loading pSrcDst[i] : i=0 to 7
2410c1bc742181ded4930842b46e9507372f0b1b963James Dong      VSHRN           dtempPred1,qtempPred1,#0                       ;// narrow qtempPred1[i] to 16 bits
2420c1bc742181ded4930842b46e9507372f0b1b963James Dong      VMOV            dtempPred0,dPredQP1
2430c1bc742181ded4930842b46e9507372f0b1b963James Dong
2440c1bc742181ded4930842b46e9507372f0b1b963James Dong      ;//updating source and row prediction buffer contents
2450c1bc742181ded4930842b46e9507372f0b1b963James Dong      VADD            qtemp,qtemp,qtempPred                          ;//pSrcDst[i]=pSrcDst[i]+qtempPred[i]: i=0 to 7
2460c1bc742181ded4930842b46e9507372f0b1b963James Dong      VQSHL           qtemp,qtemp,#4                                 ;//Clip to [-2048,2047]
2470c1bc742181ded4930842b46e9507372f0b1b963James Dong      LDRH            dcRowbufCoeff,[pPredBufRow]                    ;//Loading Dc Value of Row Prediction buffer
2480c1bc742181ded4930842b46e9507372f0b1b963James Dong      VSHR            qtemp,qtemp,#4
2490c1bc742181ded4930842b46e9507372f0b1b963James Dong
2500c1bc742181ded4930842b46e9507372f0b1b963James Dong      VST1            {dtemp0,dtemp1},[pSrcDst]                      ;//storing back the updated values
2510c1bc742181ded4930842b46e9507372f0b1b963James Dong      VST1            {dtemp0,dtemp1},[pPredBufRow]                  ;//storing back the updated row prediction values
2520c1bc742181ded4930842b46e9507372f0b1b963James Dong      STRH            dcRowbufCoeff,[pPredBufRow]                    ;// storing the updated DC Row Prediction coeff
2530c1bc742181ded4930842b46e9507372f0b1b963James Dong
2540c1bc742181ded4930842b46e9507372f0b1b963James Dong      B               Exit
2550c1bc742181ded4930842b46e9507372f0b1b963James Dong
2560c1bc742181ded4930842b46e9507372f0b1b963James DongHorizontal
2570c1bc742181ded4930842b46e9507372f0b1b963James Dong
2580c1bc742181ded4930842b46e9507372f0b1b963James Dong      ;// Calculating Temppred
2590c1bc742181ded4930842b46e9507372f0b1b963James Dong
2600c1bc742181ded4930842b46e9507372f0b1b963James Dong
2610c1bc742181ded4930842b46e9507372f0b1b963James Dong
2620c1bc742181ded4930842b46e9507372f0b1b963James Dong      VLD1            {dPredRowBuf0,dPredRowBuf1},[pPredBufCol]      ;// Loading pPredBufCol[i]:i=0 t0 7
2630c1bc742181ded4930842b46e9507372f0b1b963James Dong      VMULL           qtemp1,dPredRowBuf0,dPredQP0                   ;//qtemp1[i]=pPredBufCol[i]*dPredQP[i]: i=0 t0 3
2640c1bc742181ded4930842b46e9507372f0b1b963James Dong      VMUL            qtempPred1,qtemp1,qCoeffTab                    ;//qtempPred1[i]=pPredBufCol[i]*dPredQP[i]*0x1ffff/curQP : i=0 t0 3
2650c1bc742181ded4930842b46e9507372f0b1b963James Dong
2660c1bc742181ded4930842b46e9507372f0b1b963James Dong      VMULL           qtemp1,dPredRowBuf1,dPredQP0                   ;//qtemp1[i]=pPredBufCol[i]*dPredQP[i] : i=4 t0 7
2670c1bc742181ded4930842b46e9507372f0b1b963James Dong
2680c1bc742181ded4930842b46e9507372f0b1b963James Dong      VRSHR           qtempPred1,qtempPred1,#17                      ;//qtempPred1[i]=round(pPredBufCol[i]*dPredQP[i]/curQP) : i=0 t0 3
2690c1bc742181ded4930842b46e9507372f0b1b963James Dong      VSHRN           dPredQP1,qtempPred1,#0                         ;// narrow qtempPred1[i] to 16 bits
2700c1bc742181ded4930842b46e9507372f0b1b963James Dong
2710c1bc742181ded4930842b46e9507372f0b1b963James Dong
2720c1bc742181ded4930842b46e9507372f0b1b963James Dong      VMUL            qtempPred1,qtemp1,qCoeffTab                    ;//qtempPred1[i]=pPredBufCol[i]*dPredQP[i]*0x1ffff/curQP : i=4 t0 7
2730c1bc742181ded4930842b46e9507372f0b1b963James Dong
2740c1bc742181ded4930842b46e9507372f0b1b963James Dong      MOV             temppPredColBuf,pPredBufCol
2750c1bc742181ded4930842b46e9507372f0b1b963James Dong      VRSHR           qtempPred1,qtempPred1,#17                      ;//qtempPred1[i]=round(pPredBufCol[i]*dPredQP[i]/curQP)  : i=4 t0 7
2760c1bc742181ded4930842b46e9507372f0b1b963James Dong      VLD4            {dtemp0,dtemp1,dtemp2,dtemp3},[pSrcDst]        ;// Loading coefficients Interleaving by 4
2770c1bc742181ded4930842b46e9507372f0b1b963James Dong      VSHRN           dtempPred1,qtempPred1,#0                       ;// narrow qtempPred1[i] to 16 bits
2780c1bc742181ded4930842b46e9507372f0b1b963James Dong      VMOV            dtempPred0,dPredQP1
2790c1bc742181ded4930842b46e9507372f0b1b963James Dong
2800c1bc742181ded4930842b46e9507372f0b1b963James Dong      ;// Updating source and column prediction buffer contents
2810c1bc742181ded4930842b46e9507372f0b1b963James Dong      ADD             temp2,pSrcDst,#32
2820c1bc742181ded4930842b46e9507372f0b1b963James Dong      VLD4            {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]          ;// Loading next 16 coefficients Interleaving by 4
2830c1bc742181ded4930842b46e9507372f0b1b963James Dong      VUZP            dtemp0,dtemp4                                  ;// Interleaving by 8
2840c1bc742181ded4930842b46e9507372f0b1b963James Dong      VADD            dtemp0,dtemp0,dtempPred0                       ;// Adding tempPred to coeffs
2850c1bc742181ded4930842b46e9507372f0b1b963James Dong      VQSHL           dtemp0,dtemp0,#4                               ;// Clip to [-2048,2047]
2860c1bc742181ded4930842b46e9507372f0b1b963James Dong      VSHR            dtemp0,dtemp0,#4
2870c1bc742181ded4930842b46e9507372f0b1b963James Dong      VST1            {dtemp0},[pPredBufCol]!                        ;// Updating Pridiction column buffer
2880c1bc742181ded4930842b46e9507372f0b1b963James Dong      VZIP            dtemp0,dtemp4                                  ;// deinterleaving
2890c1bc742181ded4930842b46e9507372f0b1b963James Dong      VST4            {dtemp0,dtemp1,dtemp2,dtemp3},[pSrcDst]        ;// Updating source coeffs
2900c1bc742181ded4930842b46e9507372f0b1b963James Dong      VST4            {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]!
2910c1bc742181ded4930842b46e9507372f0b1b963James Dong
2920c1bc742181ded4930842b46e9507372f0b1b963James Dong      MOV             temp1,temp2
2930c1bc742181ded4930842b46e9507372f0b1b963James Dong      VLD4            {dtemp0,dtemp1,dtemp2,dtemp3},[temp2]!         ;// Loading  coefficients Interleaving by 4
2940c1bc742181ded4930842b46e9507372f0b1b963James Dong
2950c1bc742181ded4930842b46e9507372f0b1b963James Dong      VLD4            {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]
2960c1bc742181ded4930842b46e9507372f0b1b963James Dong      VUZP            dtemp0,dtemp4                                  ;// Interleaving by 8
2970c1bc742181ded4930842b46e9507372f0b1b963James Dong      VADD            dtemp0,dtemp0,dtempPred1
2980c1bc742181ded4930842b46e9507372f0b1b963James Dong      VQSHL           dtemp0,dtemp0,#4                               ;// Clip to [-2048,2047]
2990c1bc742181ded4930842b46e9507372f0b1b963James Dong      VSHR            dtemp0,dtemp0,#4
3000c1bc742181ded4930842b46e9507372f0b1b963James Dong      VST1            {dtemp0},[pPredBufCol]!
3010c1bc742181ded4930842b46e9507372f0b1b963James Dong      VZIP            dtemp0,dtemp4
3020c1bc742181ded4930842b46e9507372f0b1b963James Dong      VST4            {dtemp0,dtemp1,dtemp2,dtemp3},[temp1]
3030c1bc742181ded4930842b46e9507372f0b1b963James Dong      STRH            dcColBuffCoeff,[temppPredColBuf]
3040c1bc742181ded4930842b46e9507372f0b1b963James Dong      VST4            {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]
3050c1bc742181ded4930842b46e9507372f0b1b963James Dong
3060c1bc742181ded4930842b46e9507372f0b1b963James DongExit
3070c1bc742181ded4930842b46e9507372f0b1b963James Dong
3080c1bc742181ded4930842b46e9507372f0b1b963James Dong      STRH            temp,[pSrcDst]
3090c1bc742181ded4930842b46e9507372f0b1b963James Dong
3100c1bc742181ded4930842b46e9507372f0b1b963James Dong
3110c1bc742181ded4930842b46e9507372f0b1b963James Dong      MOV             Return,#OMX_Sts_NoErr
3120c1bc742181ded4930842b46e9507372f0b1b963James Dong
3130c1bc742181ded4930842b46e9507372f0b1b963James Dong      M_END
3140c1bc742181ded4930842b46e9507372f0b1b963James Dong      ENDIF
3150c1bc742181ded4930842b46e9507372f0b1b963James Dong
3160c1bc742181ded4930842b46e9507372f0b1b963James Dong
3170c1bc742181ded4930842b46e9507372f0b1b963James Dong       END
3180c1bc742181ded4930842b46e9507372f0b1b963James Dong
3190c1bc742181ded4930842b46e9507372f0b1b963James Dong
3200c1bc742181ded4930842b46e9507372f0b1b963James Dong
321