178e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Copyright (C) 2007-2008 ARM Limited
378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Licensed under the Apache License, Version 2.0 (the "License");
578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// you may not use this file except in compliance with the License.
678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// You may obtain a copy of the License at
778e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
878e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//      http://www.apache.org/licenses/LICENSE-2.0
978e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
1078e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Unless required by applicable law or agreed to in writing, software
1178e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// distributed under the License is distributed on an "AS IS" BASIS,
1278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// See the License for the specific language governing permissions and
1478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// limitations under the License.
1578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
160c1bc742181ded4930842b46e9507372f0b1b963James Dong; **********
170c1bc742181ded4930842b46e9507372f0b1b963James Dong; *
180c1bc742181ded4930842b46e9507372f0b1b963James Dong; * File Name:  omxVCM4P2_PredictReconCoefIntra_s.s
190c1bc742181ded4930842b46e9507372f0b1b963James Dong; * OpenMAX DL: v1.0.2
200c1bc742181ded4930842b46e9507372f0b1b963James Dong; * Revision:   12290
210c1bc742181ded4930842b46e9507372f0b1b963James Dong; * Date:       Wednesday, April 9, 2008
220c1bc742181ded4930842b46e9507372f0b1b963James Dong; *
230c1bc742181ded4930842b46e9507372f0b1b963James Dong; *
240c1bc742181ded4930842b46e9507372f0b1b963James Dong; *
250c1bc742181ded4930842b46e9507372f0b1b963James Dong; *
260c1bc742181ded4930842b46e9507372f0b1b963James Dong; * Description:
270c1bc742181ded4930842b46e9507372f0b1b963James Dong; * Contains module for DC/AC coefficient prediction
280c1bc742181ded4930842b46e9507372f0b1b963James Dong; *
290c1bc742181ded4930842b46e9507372f0b1b963James Dong; *
300c1bc742181ded4930842b46e9507372f0b1b963James Dong; * Function: omxVCM4P2_PredictReconCoefIntra
310c1bc742181ded4930842b46e9507372f0b1b963James Dong; *
320c1bc742181ded4930842b46e9507372f0b1b963James Dong; * Description:
330c1bc742181ded4930842b46e9507372f0b1b963James Dong; * Performs adaptive DC/AC coefficient prediction for an intra block. Prior
340c1bc742181ded4930842b46e9507372f0b1b963James Dong; * to the function call, prediction direction (predDir) should be selected
350c1bc742181ded4930842b46e9507372f0b1b963James Dong; * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2.
360c1bc742181ded4930842b46e9507372f0b1b963James Dong; *
370c1bc742181ded4930842b46e9507372f0b1b963James Dong; * Remarks:
380c1bc742181ded4930842b46e9507372f0b1b963James Dong; *
390c1bc742181ded4930842b46e9507372f0b1b963James Dong; * Parameters:
400c1bc742181ded4930842b46e9507372f0b1b963James Dong; * [in]  pSrcDst      pointer to the coefficient buffer which contains the
410c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    quantized coefficient residuals (PQF) of the current
420c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    block; must be aligned on a 4-byte boundary. The
430c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    output coefficients are saturated to the range
440c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    [-2048, 2047].
450c1bc742181ded4930842b46e9507372f0b1b963James Dong; * [in]  pPredBufRow  pointer to the coefficient row buffer; must be aligned
460c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    on a 4-byte boundary.
470c1bc742181ded4930842b46e9507372f0b1b963James Dong; * [in]  pPredBufCol  pointer to the coefficient column buffer; must be
480c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    aligned on a 4-byte boundary.
490c1bc742181ded4930842b46e9507372f0b1b963James Dong; * [in]  curQP        quantization parameter of the current block. curQP may
500c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    equal to predQP especially when the current block and
510c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    the predictor block are in the same macroblock.
520c1bc742181ded4930842b46e9507372f0b1b963James Dong; * [in]  predQP       quantization parameter of the predictor block
530c1bc742181ded4930842b46e9507372f0b1b963James Dong; * [in]  predDir      indicates the prediction direction which takes one
540c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    of the following values:
550c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    OMX_VIDEO_HORIZONTAL    predict horizontally
560c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    OMX_VIDEO_VERTICAL        predict vertically
570c1bc742181ded4930842b46e9507372f0b1b963James Dong; * [in]  ACPredFlag   a flag indicating if AC prediction should be
580c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    performed. It is equal to ac_pred_flag in the bit
590c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    stream syntax of MPEG-4
600c1bc742181ded4930842b46e9507372f0b1b963James Dong; * [in]  videoComp    video component type (luminance, chrominance or
610c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    alpha) of the current block
620c1bc742181ded4930842b46e9507372f0b1b963James Dong; * [out] pSrcDst      pointer to the coefficient buffer which contains
630c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    the quantized coefficients (QF) of the current
640c1bc742181ded4930842b46e9507372f0b1b963James Dong; *                    block
650c1bc742181ded4930842b46e9507372f0b1b963James Dong; * [out] pPredBufRow  pointer to the updated coefficient row buffer
660c1bc742181ded4930842b46e9507372f0b1b963James Dong; * [out] pPredBufCol  pointer to the updated coefficient column buffer
670c1bc742181ded4930842b46e9507372f0b1b963James Dong; * Return Value:
680c1bc742181ded4930842b46e9507372f0b1b963James Dong; * OMX_Sts_NoErr - no error
690c1bc742181ded4930842b46e9507372f0b1b963James Dong; * OMX_Sts_BadArgErr - Bad arguments
700c1bc742181ded4930842b46e9507372f0b1b963James Dong; * - At least one of the pointers is NULL: pSrcDst, pPredBufRow, or pPredBufCol.
710c1bc742181ded4930842b46e9507372f0b1b963James Dong; * - At least one the following cases: curQP <= 0, predQP <= 0, curQP >31,
720c1bc742181ded4930842b46e9507372f0b1b963James Dong; *   predQP > 31, preDir exceeds [1,2].
730c1bc742181ded4930842b46e9507372f0b1b963James Dong; * - At least one of the pointers pSrcDst, pPredBufRow, or pPredBufCol is not
740c1bc742181ded4930842b46e9507372f0b1b963James Dong; *   4-byte aligned.
750c1bc742181ded4930842b46e9507372f0b1b963James Dong; *
760c1bc742181ded4930842b46e9507372f0b1b963James Dong; *********
770c1bc742181ded4930842b46e9507372f0b1b963James Dong
780c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE omxtypes_s.h
790c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE armCOMM_s.h
800c1bc742181ded4930842b46e9507372f0b1b963James Dong
810c1bc742181ded4930842b46e9507372f0b1b963James Dong       M_VARIANTS CortexA8
820c1bc742181ded4930842b46e9507372f0b1b963James Dong
830c1bc742181ded4930842b46e9507372f0b1b963James Dong
840c1bc742181ded4930842b46e9507372f0b1b963James Dong
850c1bc742181ded4930842b46e9507372f0b1b963James Dong       IMPORT        armVCM4P2_Reciprocal_QP_S32
860c1bc742181ded4930842b46e9507372f0b1b963James Dong       IMPORT        armVCM4P2_Reciprocal_QP_S16
870c1bc742181ded4930842b46e9507372f0b1b963James Dong       IMPORT        armVCM4P2_DCScaler
880c1bc742181ded4930842b46e9507372f0b1b963James Dong
890c1bc742181ded4930842b46e9507372f0b1b963James Dong        IF CortexA8
900c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Input Arguments
910c1bc742181ded4930842b46e9507372f0b1b963James Dong
920c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcDst          RN 0
930c1bc742181ded4930842b46e9507372f0b1b963James DongpPredBufRow      RN 1
940c1bc742181ded4930842b46e9507372f0b1b963James DongpPredBufCol      RN 2
950c1bc742181ded4930842b46e9507372f0b1b963James DongcurQP            RN 3
960c1bc742181ded4930842b46e9507372f0b1b963James DongQP               RN 3
970c1bc742181ded4930842b46e9507372f0b1b963James DongpredQP           RN 4
980c1bc742181ded4930842b46e9507372f0b1b963James DongpredDir          RN 5
990c1bc742181ded4930842b46e9507372f0b1b963James DongACPredFlag       RN 6
1000c1bc742181ded4930842b46e9507372f0b1b963James DongvideoComp        RN 7
1010c1bc742181ded4930842b46e9507372f0b1b963James Dong
1020c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Local Variables
1030c1bc742181ded4930842b46e9507372f0b1b963James Dong
1040c1bc742181ded4930842b46e9507372f0b1b963James DongshortVideoHeader RN 4
1050c1bc742181ded4930842b46e9507372f0b1b963James DongdcScaler         RN 4
1060c1bc742181ded4930842b46e9507372f0b1b963James Dongindex            RN 6
1070c1bc742181ded4930842b46e9507372f0b1b963James DongpredCoeffTable   RN 7
1080c1bc742181ded4930842b46e9507372f0b1b963James Dongtemp1            RN 6
1090c1bc742181ded4930842b46e9507372f0b1b963James Dongtemp2            RN 9
1100c1bc742181ded4930842b46e9507372f0b1b963James Dongtemp             RN 14
1110c1bc742181ded4930842b46e9507372f0b1b963James DongConst            RN 8
1120c1bc742181ded4930842b46e9507372f0b1b963James DongtemppPredColBuf  RN 8
1130c1bc742181ded4930842b46e9507372f0b1b963James DongtempPred         RN 9
1140c1bc742181ded4930842b46e9507372f0b1b963James Dong
1150c1bc742181ded4930842b46e9507372f0b1b963James DongabsCoeffDC       RN 8
1160c1bc742181ded4930842b46e9507372f0b1b963James DongnegdcScaler      RN 10
1170c1bc742181ded4930842b46e9507372f0b1b963James DongRem              RN 11
1180c1bc742181ded4930842b46e9507372f0b1b963James Dongtemp3            RN 12
1190c1bc742181ded4930842b46e9507372f0b1b963James Dong
1200c1bc742181ded4930842b46e9507372f0b1b963James DongdcRowbufCoeff    RN 10
1210c1bc742181ded4930842b46e9507372f0b1b963James DongdcColBuffCoeff   RN 11
1220c1bc742181ded4930842b46e9507372f0b1b963James DongReturn           RN 0
1230c1bc742181ded4930842b46e9507372f0b1b963James Dong
1240c1bc742181ded4930842b46e9507372f0b1b963James Dong;//NEON Registers
1250c1bc742181ded4930842b46e9507372f0b1b963James Dong
1260c1bc742181ded4930842b46e9507372f0b1b963James DongqPredRowBuf       QN Q0.S16
1270c1bc742181ded4930842b46e9507372f0b1b963James DongdPredRowBuf0      DN D0.S16
1280c1bc742181ded4930842b46e9507372f0b1b963James DongdPredRowBuf1      DN D1.S16
1290c1bc742181ded4930842b46e9507372f0b1b963James Dong
1300c1bc742181ded4930842b46e9507372f0b1b963James Dong
1310c1bc742181ded4930842b46e9507372f0b1b963James Dong
1320c1bc742181ded4930842b46e9507372f0b1b963James Dong
1330c1bc742181ded4930842b46e9507372f0b1b963James DongqCoeffTab         QN Q1.S32
1340c1bc742181ded4930842b46e9507372f0b1b963James Dong
1350c1bc742181ded4930842b46e9507372f0b1b963James DongqPredQP           QN Q2.S16
1360c1bc742181ded4930842b46e9507372f0b1b963James DongdPredQP0          DN D4.S16
1370c1bc742181ded4930842b46e9507372f0b1b963James DongdPredQP1          DN D5.S16
1380c1bc742181ded4930842b46e9507372f0b1b963James Dong
1390c1bc742181ded4930842b46e9507372f0b1b963James Dong
1400c1bc742181ded4930842b46e9507372f0b1b963James Dongqtemp1            QN Q3.S32
1410c1bc742181ded4930842b46e9507372f0b1b963James Dongqtemp             QN Q3.S16
1420c1bc742181ded4930842b46e9507372f0b1b963James Dong
1430c1bc742181ded4930842b46e9507372f0b1b963James Dongdtemp0            DN D6.S16
1440c1bc742181ded4930842b46e9507372f0b1b963James Dongdtemp1            DN D7.S16
1450c1bc742181ded4930842b46e9507372f0b1b963James Dong
1460c1bc742181ded4930842b46e9507372f0b1b963James Dongdtemp2            DN D8.S16
1470c1bc742181ded4930842b46e9507372f0b1b963James Dongdtemp3            DN D9.S16
1480c1bc742181ded4930842b46e9507372f0b1b963James Dong
1490c1bc742181ded4930842b46e9507372f0b1b963James Dongdtemp4            DN D2.S16
1500c1bc742181ded4930842b46e9507372f0b1b963James Dongdtemp5            DN D3.S16
1510c1bc742181ded4930842b46e9507372f0b1b963James Dongdtemp6            DN D4.S16
1520c1bc742181ded4930842b46e9507372f0b1b963James Dongdtemp7            DN D5.S16
1530c1bc742181ded4930842b46e9507372f0b1b963James Dong
1540c1bc742181ded4930842b46e9507372f0b1b963James DongqtempPred1        QN Q5.S32
1550c1bc742181ded4930842b46e9507372f0b1b963James DongqtempPred         QN Q5.S16
1560c1bc742181ded4930842b46e9507372f0b1b963James Dong
1570c1bc742181ded4930842b46e9507372f0b1b963James DongdtempPred0        DN D10.S16
1580c1bc742181ded4930842b46e9507372f0b1b963James DongdtempPred1        DN D11.S16
1590c1bc742181ded4930842b46e9507372f0b1b963James Dong
1600c1bc742181ded4930842b46e9507372f0b1b963James Dong
1610c1bc742181ded4930842b46e9507372f0b1b963James Dong
1620c1bc742181ded4930842b46e9507372f0b1b963James Dong      M_START   omxVCM4P2_PredictReconCoefIntra,r11,d11
1630c1bc742181ded4930842b46e9507372f0b1b963James Dong
1640c1bc742181ded4930842b46e9507372f0b1b963James Dong      ;// Assigning pointers to Input arguments on Stack
1650c1bc742181ded4930842b46e9507372f0b1b963James Dong
1660c1bc742181ded4930842b46e9507372f0b1b963James Dong      M_ARG           predQPonStack,4
1670c1bc742181ded4930842b46e9507372f0b1b963James Dong      M_ARG           predDironStack,4
1680c1bc742181ded4930842b46e9507372f0b1b963James Dong      M_ARG           ACPredFlagonStack,4
1690c1bc742181ded4930842b46e9507372f0b1b963James Dong      M_ARG           videoComponStack,4
1700c1bc742181ded4930842b46e9507372f0b1b963James Dong
1710c1bc742181ded4930842b46e9507372f0b1b963James Dong      ;// DC Prediction
1720c1bc742181ded4930842b46e9507372f0b1b963James Dong
1730c1bc742181ded4930842b46e9507372f0b1b963James Dong      M_LDR           videoComp,videoComponStack                     ;// Load videoComp From Stack
1740c1bc742181ded4930842b46e9507372f0b1b963James Dong
1750c1bc742181ded4930842b46e9507372f0b1b963James Dong      M_LDR           predDir,predDironStack                         ;// Load Prediction direction
1760c1bc742181ded4930842b46e9507372f0b1b963James Dong      ;// DC Scaler calculation
1770c1bc742181ded4930842b46e9507372f0b1b963James Dong      LDR             index, =armVCM4P2_DCScaler
1780c1bc742181ded4930842b46e9507372f0b1b963James Dong      ADD             index,index,videoComp,LSL #5
1790c1bc742181ded4930842b46e9507372f0b1b963James Dong      LDRB            dcScaler,[index,QP]
1800c1bc742181ded4930842b46e9507372f0b1b963James Dong
1810c1bc742181ded4930842b46e9507372f0b1b963James Dong
1820c1bc742181ded4930842b46e9507372f0b1b963James Dong      LDR             predCoeffTable, =armVCM4P2_Reciprocal_QP_S16   ;// Loading the table with entries 32767/(1 to 63)
1830c1bc742181ded4930842b46e9507372f0b1b963James Dong      CMP             predDir,#2                                     ;// Check if the Prediction direction is vertical
1840c1bc742181ded4930842b46e9507372f0b1b963James Dong
1850c1bc742181ded4930842b46e9507372f0b1b963James Dong      ;// Caulucate tempPred
1860c1bc742181ded4930842b46e9507372f0b1b963James Dong
1870c1bc742181ded4930842b46e9507372f0b1b963James Dong      LDREQSH         absCoeffDC,[pPredBufRow]                       ;// If vetical load the coeff from Row Prediction Buffer
1880c1bc742181ded4930842b46e9507372f0b1b963James Dong      LDRNESH         absCoeffDC,[pPredBufCol]                       ;// If horizontal load the coeff from column Prediction Buffer
1890c1bc742181ded4930842b46e9507372f0b1b963James Dong
1900c1bc742181ded4930842b46e9507372f0b1b963James Dong      RSB             negdcScaler,dcScaler,#0                        ;// negdcScaler=-dcScaler
1910c1bc742181ded4930842b46e9507372f0b1b963James Dong      MOV             temp1,absCoeffDC                               ;// Load the Prediction coeff to temp for comparision
1920c1bc742181ded4930842b46e9507372f0b1b963James Dong      CMP             temp1,#0
1930c1bc742181ded4930842b46e9507372f0b1b963James Dong      RSBLT           absCoeffDC,temp1,#0                            ;// calculate absolute val of prediction coeff
1940c1bc742181ded4930842b46e9507372f0b1b963James Dong
1950c1bc742181ded4930842b46e9507372f0b1b963James Dong      ADD             temp,dcScaler,dcScaler
1960c1bc742181ded4930842b46e9507372f0b1b963James Dong      LDRH            temp,[predCoeffTable,temp]                     ;// Load value from coeff table for performing division using multiplication
1970c1bc742181ded4930842b46e9507372f0b1b963James Dong      SMULBB          tempPred,temp,absCoeffDC                       ;// tempped=pPredBufRow(Col)[0]*32767/dcScaler
1980c1bc742181ded4930842b46e9507372f0b1b963James Dong      ADD             temp3,dcScaler,#1
1990c1bc742181ded4930842b46e9507372f0b1b963James Dong      LSR             tempPred,tempPred,#15                          ;// tempped=pPredBufRow(Col)[0]/dcScaler
2000c1bc742181ded4930842b46e9507372f0b1b963James Dong      LSR             temp3,temp3,#1                                 ;// temp3=round(dcScaler/2)
2010c1bc742181ded4930842b46e9507372f0b1b963James Dong      MLA             Rem,negdcScaler,tempPred,absCoeffDC            ;// Remainder Rem=abs(pPredBufRow(Col)[0])-tempPred*dcScaler
2020c1bc742181ded4930842b46e9507372f0b1b963James Dong
2030c1bc742181ded4930842b46e9507372f0b1b963James Dong      LDRH            dcRowbufCoeff,[pPredBufCol]
2040c1bc742181ded4930842b46e9507372f0b1b963James Dong
2050c1bc742181ded4930842b46e9507372f0b1b963James Dong      CMP             Rem,temp3                                      ;// compare Rem with (dcScaler/2)
2060c1bc742181ded4930842b46e9507372f0b1b963James Dong      ADDGE           tempPred,#1                                    ;// tempPred=tempPred+1 if Rem>=(dcScaler/2)
2070c1bc742181ded4930842b46e9507372f0b1b963James Dong      CMP             temp1,#0
2080c1bc742181ded4930842b46e9507372f0b1b963James Dong      RSBLT           tempPred,tempPred,#0                           ;// tempPred=-tempPred if
2090c1bc742181ded4930842b46e9507372f0b1b963James Dong
2100c1bc742181ded4930842b46e9507372f0b1b963James Dong      STRH            dcRowbufCoeff,[pPredBufRow,#-16]
2110c1bc742181ded4930842b46e9507372f0b1b963James Dong
2120c1bc742181ded4930842b46e9507372f0b1b963James Dong
2130c1bc742181ded4930842b46e9507372f0b1b963James Dong      LDRH            temp,[pSrcDst]                                 ;// temp=pSrcDst[0]
2140c1bc742181ded4930842b46e9507372f0b1b963James Dong      ADD             temp,temp,tempPred                             ;// temp=pSrcDst[0]+tempPred
2150c1bc742181ded4930842b46e9507372f0b1b963James Dong      SSAT16          temp,#12,temp                                  ;// clip temp to [-2048,2047]
2160c1bc742181ded4930842b46e9507372f0b1b963James Dong      SMULBB          dcColBuffCoeff,temp,dcScaler                   ;// temp1=clipped(pSrcDst[0])*dcScaler
2170c1bc742181ded4930842b46e9507372f0b1b963James Dong      M_LDR           ACPredFlag,ACPredFlagonStack
2180c1bc742181ded4930842b46e9507372f0b1b963James Dong      STRH            dcColBuffCoeff,[pPredBufCol]
2190c1bc742181ded4930842b46e9507372f0b1b963James Dong
2200c1bc742181ded4930842b46e9507372f0b1b963James Dong
2210c1bc742181ded4930842b46e9507372f0b1b963James Dong       ;// AC Prediction
2220c1bc742181ded4930842b46e9507372f0b1b963James Dong
2230c1bc742181ded4930842b46e9507372f0b1b963James Dong      M_LDR           predQP,predQPonStack
2240c1bc742181ded4930842b46e9507372f0b1b963James Dong
2250c1bc742181ded4930842b46e9507372f0b1b963James Dong      CMP             ACPredFlag,#1                                  ;// Check if the AC prediction flag is set or not
2260c1bc742181ded4930842b46e9507372f0b1b963James Dong      BNE             Exit                                           ;// If not set Exit
2270c1bc742181ded4930842b46e9507372f0b1b963James Dong      CMP             predDir,#2                                     ;// Check the Prediction direction
2280c1bc742181ded4930842b46e9507372f0b1b963James Dong      LDR             predCoeffTable, =armVCM4P2_Reciprocal_QP_S32   ;// Loading the table with entries 0x1ffff/(1 to 63)
2290c1bc742181ded4930842b46e9507372f0b1b963James Dong      MOV             Const,#4
2300c1bc742181ded4930842b46e9507372f0b1b963James Dong      MUL             curQP,curQP,Const                              ;// curQP=4*curQP
2310c1bc742181ded4930842b46e9507372f0b1b963James Dong      VDUP            dPredQP0,predQP
2320c1bc742181ded4930842b46e9507372f0b1b963James Dong      LDR             temp2,[predCoeffTable,curQP]                   ;// temp=0x1ffff/curQP
2330c1bc742181ded4930842b46e9507372f0b1b963James Dong      VDUP            qCoeffTab,temp2
2340c1bc742181ded4930842b46e9507372f0b1b963James Dong      BNE             Horizontal                                     ;// If the Prediction direction is horizontal branch to Horizontal
2350c1bc742181ded4930842b46e9507372f0b1b963James Dong
2360c1bc742181ded4930842b46e9507372f0b1b963James Dong
2370c1bc742181ded4930842b46e9507372f0b1b963James Dong
2380c1bc742181ded4930842b46e9507372f0b1b963James Dong      ;// Vertical
2390c1bc742181ded4930842b46e9507372f0b1b963James Dong      ;//Calculating tempPred
2400c1bc742181ded4930842b46e9507372f0b1b963James Dong
2410c1bc742181ded4930842b46e9507372f0b1b963James Dong      VLD1            {dPredRowBuf0,dPredRowBuf1},[pPredBufRow]      ;// Loading pPredBufRow[i]:i=0 t0 7
2420c1bc742181ded4930842b46e9507372f0b1b963James Dong
2430c1bc742181ded4930842b46e9507372f0b1b963James Dong      VMULL           qtemp1,dPredRowBuf0,dPredQP0                   ;//qtemp1[i]=pPredBufRow[i]*dPredQP[i]: i=0 t0 3
2440c1bc742181ded4930842b46e9507372f0b1b963James Dong      VMUL            qtempPred1,qtemp1,qCoeffTab                    ;//qtempPred1[i]=pPredBufRow[i]*dPredQP[i]*0x1ffff/curQP : i=0 t0 3
2450c1bc742181ded4930842b46e9507372f0b1b963James Dong
2460c1bc742181ded4930842b46e9507372f0b1b963James Dong      VMULL           qtemp1,dPredRowBuf1,dPredQP0                   ;//qtemp1[i]=pPredBufRow[i]*dPredQP[i] : i=4 t0 7
2470c1bc742181ded4930842b46e9507372f0b1b963James Dong
2480c1bc742181ded4930842b46e9507372f0b1b963James Dong      VRSHR           qtempPred1,qtempPred1,#17                      ;//qtempPred1[i]=round(pPredBufRow[i]*dPredQP[i]/curQP) : i=0 t0 3
2490c1bc742181ded4930842b46e9507372f0b1b963James Dong      VSHRN           dPredQP1,qtempPred1,#0                         ;// narrow qtempPred1[i] to 16 bits
2500c1bc742181ded4930842b46e9507372f0b1b963James Dong
2510c1bc742181ded4930842b46e9507372f0b1b963James Dong
2520c1bc742181ded4930842b46e9507372f0b1b963James Dong      VMUL            qtempPred1,qtemp1,qCoeffTab                    ;//qtempPred1[i]=pPredBufRow[i]*dPredQP[i]*0x1ffff/curQP : i=4 t0 7
2530c1bc742181ded4930842b46e9507372f0b1b963James Dong      VRSHR           qtempPred1,qtempPred1,#17                      ;//qtempPred1[i]=round(pPredBufRow[i]*dPredQP[i]/curQP)  : i=4 t0 7
2540c1bc742181ded4930842b46e9507372f0b1b963James Dong      VLD1            {dtemp0,dtemp1},[pSrcDst]                      ;//Loading pSrcDst[i] : i=0 to 7
2550c1bc742181ded4930842b46e9507372f0b1b963James Dong      VSHRN           dtempPred1,qtempPred1,#0                       ;// narrow qtempPred1[i] to 16 bits
2560c1bc742181ded4930842b46e9507372f0b1b963James Dong      VMOV            dtempPred0,dPredQP1
2570c1bc742181ded4930842b46e9507372f0b1b963James Dong
2580c1bc742181ded4930842b46e9507372f0b1b963James Dong      ;//updating source and row prediction buffer contents
2590c1bc742181ded4930842b46e9507372f0b1b963James Dong      VADD            qtemp,qtemp,qtempPred                          ;//pSrcDst[i]=pSrcDst[i]+qtempPred[i]: i=0 to 7
2600c1bc742181ded4930842b46e9507372f0b1b963James Dong      VQSHL           qtemp,qtemp,#4                                 ;//Clip to [-2048,2047]
2610c1bc742181ded4930842b46e9507372f0b1b963James Dong      LDRH            dcRowbufCoeff,[pPredBufRow]                    ;//Loading Dc Value of Row Prediction buffer
2620c1bc742181ded4930842b46e9507372f0b1b963James Dong      VSHR            qtemp,qtemp,#4
2630c1bc742181ded4930842b46e9507372f0b1b963James Dong
2640c1bc742181ded4930842b46e9507372f0b1b963James Dong      VST1            {dtemp0,dtemp1},[pSrcDst]                      ;//storing back the updated values
2650c1bc742181ded4930842b46e9507372f0b1b963James Dong      VST1            {dtemp0,dtemp1},[pPredBufRow]                  ;//storing back the updated row prediction values
2660c1bc742181ded4930842b46e9507372f0b1b963James Dong      STRH            dcRowbufCoeff,[pPredBufRow]                    ;// storing the updated DC Row Prediction coeff
2670c1bc742181ded4930842b46e9507372f0b1b963James Dong
2680c1bc742181ded4930842b46e9507372f0b1b963James Dong      B               Exit
2690c1bc742181ded4930842b46e9507372f0b1b963James Dong
2700c1bc742181ded4930842b46e9507372f0b1b963James DongHorizontal
2710c1bc742181ded4930842b46e9507372f0b1b963James Dong
2720c1bc742181ded4930842b46e9507372f0b1b963James Dong      ;// Calculating Temppred
2730c1bc742181ded4930842b46e9507372f0b1b963James Dong
2740c1bc742181ded4930842b46e9507372f0b1b963James Dong
2750c1bc742181ded4930842b46e9507372f0b1b963James Dong
2760c1bc742181ded4930842b46e9507372f0b1b963James Dong      VLD1            {dPredRowBuf0,dPredRowBuf1},[pPredBufCol]      ;// Loading pPredBufCol[i]:i=0 t0 7
2770c1bc742181ded4930842b46e9507372f0b1b963James Dong      VMULL           qtemp1,dPredRowBuf0,dPredQP0                   ;//qtemp1[i]=pPredBufCol[i]*dPredQP[i]: i=0 t0 3
2780c1bc742181ded4930842b46e9507372f0b1b963James Dong      VMUL            qtempPred1,qtemp1,qCoeffTab                    ;//qtempPred1[i]=pPredBufCol[i]*dPredQP[i]*0x1ffff/curQP : i=0 t0 3
2790c1bc742181ded4930842b46e9507372f0b1b963James Dong
2800c1bc742181ded4930842b46e9507372f0b1b963James Dong      VMULL           qtemp1,dPredRowBuf1,dPredQP0                   ;//qtemp1[i]=pPredBufCol[i]*dPredQP[i] : i=4 t0 7
2810c1bc742181ded4930842b46e9507372f0b1b963James Dong
2820c1bc742181ded4930842b46e9507372f0b1b963James Dong      VRSHR           qtempPred1,qtempPred1,#17                      ;//qtempPred1[i]=round(pPredBufCol[i]*dPredQP[i]/curQP) : i=0 t0 3
2830c1bc742181ded4930842b46e9507372f0b1b963James Dong      VSHRN           dPredQP1,qtempPred1,#0                         ;// narrow qtempPred1[i] to 16 bits
2840c1bc742181ded4930842b46e9507372f0b1b963James Dong
2850c1bc742181ded4930842b46e9507372f0b1b963James Dong
2860c1bc742181ded4930842b46e9507372f0b1b963James Dong      VMUL            qtempPred1,qtemp1,qCoeffTab                    ;//qtempPred1[i]=pPredBufCol[i]*dPredQP[i]*0x1ffff/curQP : i=4 t0 7
2870c1bc742181ded4930842b46e9507372f0b1b963James Dong
2880c1bc742181ded4930842b46e9507372f0b1b963James Dong      MOV             temppPredColBuf,pPredBufCol
2890c1bc742181ded4930842b46e9507372f0b1b963James Dong      VRSHR           qtempPred1,qtempPred1,#17                      ;//qtempPred1[i]=round(pPredBufCol[i]*dPredQP[i]/curQP)  : i=4 t0 7
2900c1bc742181ded4930842b46e9507372f0b1b963James Dong      VLD4            {dtemp0,dtemp1,dtemp2,dtemp3},[pSrcDst]        ;// Loading coefficients Interleaving by 4
2910c1bc742181ded4930842b46e9507372f0b1b963James Dong      VSHRN           dtempPred1,qtempPred1,#0                       ;// narrow qtempPred1[i] to 16 bits
2920c1bc742181ded4930842b46e9507372f0b1b963James Dong      VMOV            dtempPred0,dPredQP1
2930c1bc742181ded4930842b46e9507372f0b1b963James Dong
2940c1bc742181ded4930842b46e9507372f0b1b963James Dong      ;// Updating source and column prediction buffer contents
2950c1bc742181ded4930842b46e9507372f0b1b963James Dong      ADD             temp2,pSrcDst,#32
2960c1bc742181ded4930842b46e9507372f0b1b963James Dong      VLD4            {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]          ;// Loading next 16 coefficients Interleaving by 4
2970c1bc742181ded4930842b46e9507372f0b1b963James Dong      VUZP            dtemp0,dtemp4                                  ;// Interleaving by 8
2980c1bc742181ded4930842b46e9507372f0b1b963James Dong      VADD            dtemp0,dtemp0,dtempPred0                       ;// Adding tempPred to coeffs
2990c1bc742181ded4930842b46e9507372f0b1b963James Dong      VQSHL           dtemp0,dtemp0,#4                               ;// Clip to [-2048,2047]
3000c1bc742181ded4930842b46e9507372f0b1b963James Dong      VSHR            dtemp0,dtemp0,#4
3010c1bc742181ded4930842b46e9507372f0b1b963James Dong      VST1            {dtemp0},[pPredBufCol]!                        ;// Updating Pridiction column buffer
3020c1bc742181ded4930842b46e9507372f0b1b963James Dong      VZIP            dtemp0,dtemp4                                  ;// deinterleaving
3030c1bc742181ded4930842b46e9507372f0b1b963James Dong      VST4            {dtemp0,dtemp1,dtemp2,dtemp3},[pSrcDst]        ;// Updating source coeffs
3040c1bc742181ded4930842b46e9507372f0b1b963James Dong      VST4            {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]!
3050c1bc742181ded4930842b46e9507372f0b1b963James Dong
3060c1bc742181ded4930842b46e9507372f0b1b963James Dong      MOV             temp1,temp2
3070c1bc742181ded4930842b46e9507372f0b1b963James Dong      VLD4            {dtemp0,dtemp1,dtemp2,dtemp3},[temp2]!         ;// Loading  coefficients Interleaving by 4
3080c1bc742181ded4930842b46e9507372f0b1b963James Dong
3090c1bc742181ded4930842b46e9507372f0b1b963James Dong      VLD4            {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]
3100c1bc742181ded4930842b46e9507372f0b1b963James Dong      VUZP            dtemp0,dtemp4                                  ;// Interleaving by 8
3110c1bc742181ded4930842b46e9507372f0b1b963James Dong      VADD            dtemp0,dtemp0,dtempPred1
3120c1bc742181ded4930842b46e9507372f0b1b963James Dong      VQSHL           dtemp0,dtemp0,#4                               ;// Clip to [-2048,2047]
3130c1bc742181ded4930842b46e9507372f0b1b963James Dong      VSHR            dtemp0,dtemp0,#4
3140c1bc742181ded4930842b46e9507372f0b1b963James Dong      VST1            {dtemp0},[pPredBufCol]!
3150c1bc742181ded4930842b46e9507372f0b1b963James Dong      VZIP            dtemp0,dtemp4
3160c1bc742181ded4930842b46e9507372f0b1b963James Dong      VST4            {dtemp0,dtemp1,dtemp2,dtemp3},[temp1]
3170c1bc742181ded4930842b46e9507372f0b1b963James Dong      STRH            dcColBuffCoeff,[temppPredColBuf]
3180c1bc742181ded4930842b46e9507372f0b1b963James Dong      VST4            {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]
3190c1bc742181ded4930842b46e9507372f0b1b963James Dong
3200c1bc742181ded4930842b46e9507372f0b1b963James DongExit
3210c1bc742181ded4930842b46e9507372f0b1b963James Dong
3220c1bc742181ded4930842b46e9507372f0b1b963James Dong      STRH            temp,[pSrcDst]
3230c1bc742181ded4930842b46e9507372f0b1b963James Dong
3240c1bc742181ded4930842b46e9507372f0b1b963James Dong
3250c1bc742181ded4930842b46e9507372f0b1b963James Dong      MOV             Return,#OMX_Sts_NoErr
3260c1bc742181ded4930842b46e9507372f0b1b963James Dong
3270c1bc742181ded4930842b46e9507372f0b1b963James Dong      M_END
3280c1bc742181ded4930842b46e9507372f0b1b963James Dong      ENDIF
3290c1bc742181ded4930842b46e9507372f0b1b963James Dong
3300c1bc742181ded4930842b46e9507372f0b1b963James Dong
3310c1bc742181ded4930842b46e9507372f0b1b963James Dong       END
3320c1bc742181ded4930842b46e9507372f0b1b963James Dong
3330c1bc742181ded4930842b46e9507372f0b1b963James Dong
3340c1bc742181ded4930842b46e9507372f0b1b963James Dong
335