10c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
20c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
30c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name:  omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s
40c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2
50c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision:   12290
60c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date:       Wednesday, April 9, 2008
70c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
80c1bc742181ded4930842b46e9507372f0b1b963James Dong;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
90c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
100c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
110c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
120c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Description:
130c1bc742181ded4930842b46e9507372f0b1b963James Dong;// H.264 inverse quantize and transform module
140c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
150c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
160c1bc742181ded4930842b46e9507372f0b1b963James Dong
170c1bc742181ded4930842b46e9507372f0b1b963James Dong
180c1bc742181ded4930842b46e9507372f0b1b963James Dong
190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Include standard headers
200c1bc742181ded4930842b46e9507372f0b1b963James Dong
210c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE omxtypes_s.h
220c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE armCOMM_s.h
230c1bc742181ded4930842b46e9507372f0b1b963James Dong
240c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Import symbols required from other files
250c1bc742181ded4930842b46e9507372f0b1b963James Dong;// (For example tables)
260c1bc742181ded4930842b46e9507372f0b1b963James Dong
270c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_UnpackBlock4x4
280c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_TransformResidual4x4
290c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_QPDivTable
300c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_VMatrixU16
310c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_QPModuloTable
320c1bc742181ded4930842b46e9507372f0b1b963James Dong
330c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_VARIANTS CortexA8
340c1bc742181ded4930842b46e9507372f0b1b963James Dong
350c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Set debugging level
360c1bc742181ded4930842b46e9507372f0b1b963James Dong;//DEBUG_ON    SETL {TRUE}
370c1bc742181ded4930842b46e9507372f0b1b963James Dong
380c1bc742181ded4930842b46e9507372f0b1b963James Dong
390c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Static Function: armVCM4P10_DequantLumaAC4x4
400c1bc742181ded4930842b46e9507372f0b1b963James Dong
410c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Guarding implementation by the processor name
420c1bc742181ded4930842b46e9507372f0b1b963James Dong
430c1bc742181ded4930842b46e9507372f0b1b963James Dong
440c1bc742181ded4930842b46e9507372f0b1b963James Dong
450c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Guarding implementation by the processor name
460c1bc742181ded4930842b46e9507372f0b1b963James Dong
470c1bc742181ded4930842b46e9507372f0b1b963James Dong
480c1bc742181ded4930842b46e9507372f0b1b963James Dong
490c1bc742181ded4930842b46e9507372f0b1b963James Dong
500c1bc742181ded4930842b46e9507372f0b1b963James Dong
510c1bc742181ded4930842b46e9507372f0b1b963James Dong
520c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd
530c1bc742181ded4930842b46e9507372f0b1b963James Dong
540c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Guarding implementation by the processor name
550c1bc742181ded4930842b46e9507372f0b1b963James Dong
560c1bc742181ded4930842b46e9507372f0b1b963James Dong
570c1bc742181ded4930842b46e9507372f0b1b963James Dong
580c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd
590c1bc742181ded4930842b46e9507372f0b1b963James Dong
600c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Guarding implementation by the processor name
610c1bc742181ded4930842b46e9507372f0b1b963James Dong
620c1bc742181ded4930842b46e9507372f0b1b963James Dong    IF  CortexA8
630c1bc742181ded4930842b46e9507372f0b1b963James Dong
640c1bc742181ded4930842b46e9507372f0b1b963James Dong
650c1bc742181ded4930842b46e9507372f0b1b963James Dong;// ARM Registers
660c1bc742181ded4930842b46e9507372f0b1b963James Dong
670c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Input Registers
680c1bc742181ded4930842b46e9507372f0b1b963James DongppSrc       RN  0
690c1bc742181ded4930842b46e9507372f0b1b963James DongpPred       RN  1
700c1bc742181ded4930842b46e9507372f0b1b963James DongpDC         RN  2
710c1bc742181ded4930842b46e9507372f0b1b963James DongpDst        RN  3
720c1bc742181ded4930842b46e9507372f0b1b963James Dong
730c1bc742181ded4930842b46e9507372f0b1b963James Dong
740c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Output Registers
750c1bc742181ded4930842b46e9507372f0b1b963James Dongresult      RN  0
760c1bc742181ded4930842b46e9507372f0b1b963James Dong
770c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Local Scratch Registers
780c1bc742181ded4930842b46e9507372f0b1b963James Dong
790c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Registers used in armVCM4P10_DequantLumaAC4x4
800c1bc742181ded4930842b46e9507372f0b1b963James DongpQPdiv      RN  10
810c1bc742181ded4930842b46e9507372f0b1b963James DongpQPmod      RN  11
820c1bc742181ded4930842b46e9507372f0b1b963James DongpVRow       RN  2
830c1bc742181ded4930842b46e9507372f0b1b963James DongQPmod       RN  12
840c1bc742181ded4930842b46e9507372f0b1b963James Dongshift       RN  14
850c1bc742181ded4930842b46e9507372f0b1b963James Dongindex0      RN  1
860c1bc742181ded4930842b46e9507372f0b1b963James Dongindex1      RN  10
870c1bc742181ded4930842b46e9507372f0b1b963James Dong
880c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Registers used in DequantTransformResidualFromPairAndAdd
890c1bc742181ded4930842b46e9507372f0b1b963James DongpDelta      RN  4
900c1bc742181ded4930842b46e9507372f0b1b963James DongpDeltaTmp   RN  6
910c1bc742181ded4930842b46e9507372f0b1b963James DongAC          RN  5                   ;//Load from stack
920c1bc742181ded4930842b46e9507372f0b1b963James DongpPredTemp   RN  7
930c1bc742181ded4930842b46e9507372f0b1b963James DongpDCTemp     RN  8
940c1bc742181ded4930842b46e9507372f0b1b963James DongpDstTemp    RN  9
950c1bc742181ded4930842b46e9507372f0b1b963James DongpDeltaArg1  RN  1
960c1bc742181ded4930842b46e9507372f0b1b963James DongpDeltaArg0  RN  0
970c1bc742181ded4930842b46e9507372f0b1b963James DongQP          RN  1                   ;//Load from stack
980c1bc742181ded4930842b46e9507372f0b1b963James DongDCval       RN  10
990c1bc742181ded4930842b46e9507372f0b1b963James Dongpredstep    RN  1
1000c1bc742181ded4930842b46e9507372f0b1b963James DongdstStep     RN  10
1010c1bc742181ded4930842b46e9507372f0b1b963James DongPredVal1    RN  3
1020c1bc742181ded4930842b46e9507372f0b1b963James DongPredVal2    RN  5
1030c1bc742181ded4930842b46e9507372f0b1b963James Dong
1040c1bc742181ded4930842b46e9507372f0b1b963James Dong
1050c1bc742181ded4930842b46e9507372f0b1b963James Dong
1060c1bc742181ded4930842b46e9507372f0b1b963James Dong
1070c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Neon Registers
1080c1bc742181ded4930842b46e9507372f0b1b963James Dong
1090c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers used in armVCM4P10_DequantLumaAC4x4
1100c1bc742181ded4930842b46e9507372f0b1b963James Dong
1110c1bc742181ded4930842b46e9507372f0b1b963James DongdVmatrix            DN  D6.8
1120c1bc742181ded4930842b46e9507372f0b1b963James DongdindexRow0          DN  D7.32
1130c1bc742181ded4930842b46e9507372f0b1b963James DongdindexRow1          DN  D9.32
1140c1bc742181ded4930842b46e9507372f0b1b963James DongdByteIndexRow0      DN  D7.8
1150c1bc742181ded4930842b46e9507372f0b1b963James DongdByteIndexRow1      DN  D9.8
1160c1bc742181ded4930842b46e9507372f0b1b963James DongdVRow0              DN  D8.8
1170c1bc742181ded4930842b46e9507372f0b1b963James DongdVRow1              DN  D4.8
1180c1bc742181ded4930842b46e9507372f0b1b963James DongdVRow0U16           DN  D8.U16
1190c1bc742181ded4930842b46e9507372f0b1b963James DongdVRow1U16           DN  D4.U16
1200c1bc742181ded4930842b46e9507372f0b1b963James DongdVRow2U16           DN  D8.U16
1210c1bc742181ded4930842b46e9507372f0b1b963James DongdVRow3U16           DN  D4.U16
1220c1bc742181ded4930842b46e9507372f0b1b963James Dong
1230c1bc742181ded4930842b46e9507372f0b1b963James DongdShift              DN  D5.U16
1240c1bc742181ded4930842b46e9507372f0b1b963James DongdSrcRow0            DN  D0.I16
1250c1bc742181ded4930842b46e9507372f0b1b963James DongdSrcRow1            DN  D1.I16
1260c1bc742181ded4930842b46e9507372f0b1b963James DongdSrcRow2            DN  D2.I16
1270c1bc742181ded4930842b46e9507372f0b1b963James DongdSrcRow3            DN  D3.I16
1280c1bc742181ded4930842b46e9507372f0b1b963James DongdDqntRow0           DN  D0.I16
1290c1bc742181ded4930842b46e9507372f0b1b963James DongdDqntRow1           DN  D1.I16
1300c1bc742181ded4930842b46e9507372f0b1b963James DongdDqntRow2           DN  D2.I16
1310c1bc742181ded4930842b46e9507372f0b1b963James DongdDqntRow3           DN  D3.I16
1320c1bc742181ded4930842b46e9507372f0b1b963James Dong
1330c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers used in TransformResidual4x4
1340c1bc742181ded4930842b46e9507372f0b1b963James Dong
1350c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Packed Input pixels
1360c1bc742181ded4930842b46e9507372f0b1b963James DongdIn0                DN  D0.S16
1370c1bc742181ded4930842b46e9507372f0b1b963James DongdIn1                DN  D1.S16
1380c1bc742181ded4930842b46e9507372f0b1b963James DongdIn2                DN  D2.S16
1390c1bc742181ded4930842b46e9507372f0b1b963James DongdIn3                DN  D3.S16
1400c1bc742181ded4930842b46e9507372f0b1b963James DongqIn01               QN  Q0.32
1410c1bc742181ded4930842b46e9507372f0b1b963James DongqIn23               QN  Q1.32
1420c1bc742181ded4930842b46e9507372f0b1b963James Dong
1430c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Intermediate calculations
1440c1bc742181ded4930842b46e9507372f0b1b963James DongdZero               DN  D4.S16
1450c1bc742181ded4930842b46e9507372f0b1b963James Dongde0                 DN  D5.S16
1460c1bc742181ded4930842b46e9507372f0b1b963James Dongde1                 DN  D6.S16
1470c1bc742181ded4930842b46e9507372f0b1b963James Dongde2                 DN  D7.S16
1480c1bc742181ded4930842b46e9507372f0b1b963James Dongde3                 DN  D8.S16
1490c1bc742181ded4930842b46e9507372f0b1b963James DongdIn1RS              DN  D7.S16
1500c1bc742181ded4930842b46e9507372f0b1b963James DongdIn3RS              DN  D8.S16
1510c1bc742181ded4930842b46e9507372f0b1b963James Dongdf0                 DN  D0.S16
1520c1bc742181ded4930842b46e9507372f0b1b963James Dongdf1                 DN  D1.S16
1530c1bc742181ded4930842b46e9507372f0b1b963James Dongdf2                 DN  D2.S16
1540c1bc742181ded4930842b46e9507372f0b1b963James Dongdf3                 DN  D3.S16
1550c1bc742181ded4930842b46e9507372f0b1b963James Dongqf01                QN  Q0.32
1560c1bc742181ded4930842b46e9507372f0b1b963James Dongqf23                QN  Q1.32
1570c1bc742181ded4930842b46e9507372f0b1b963James Dongdg0                 DN  D5.S16
1580c1bc742181ded4930842b46e9507372f0b1b963James Dongdg1                 DN  D6.S16
1590c1bc742181ded4930842b46e9507372f0b1b963James Dongdg2                 DN  D7.S16
1600c1bc742181ded4930842b46e9507372f0b1b963James Dongdg3                 DN  D8.S16
1610c1bc742181ded4930842b46e9507372f0b1b963James Dongdf1RS               DN  D7.S16
1620c1bc742181ded4930842b46e9507372f0b1b963James Dongdf3RS               DN  D8.S16
1630c1bc742181ded4930842b46e9507372f0b1b963James Dong
1640c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Output pixels
1650c1bc742181ded4930842b46e9507372f0b1b963James Dongdh0                 DN  D0.S16
1660c1bc742181ded4930842b46e9507372f0b1b963James Dongdh1                 DN  D1.S16
1670c1bc742181ded4930842b46e9507372f0b1b963James Dongdh2                 DN  D2.S16
1680c1bc742181ded4930842b46e9507372f0b1b963James Dongdh3                 DN  D3.S16
1690c1bc742181ded4930842b46e9507372f0b1b963James Dong
1700c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers used in DequantTransformResidualFromPairAndAdd
1710c1bc742181ded4930842b46e9507372f0b1b963James Dong
1720c1bc742181ded4930842b46e9507372f0b1b963James DongdDeltaRow0          DN  D0.S16
1730c1bc742181ded4930842b46e9507372f0b1b963James DongdDeltaRow1          DN  D1.S16
1740c1bc742181ded4930842b46e9507372f0b1b963James DongdDeltaRow2          DN  D2.S16
1750c1bc742181ded4930842b46e9507372f0b1b963James DongdDeltaRow3          DN  D3.S16
1760c1bc742181ded4930842b46e9507372f0b1b963James DongqDeltaRow01         QN  Q0.S16
1770c1bc742181ded4930842b46e9507372f0b1b963James DongqDeltaRow23         QN  Q1.S16
1780c1bc742181ded4930842b46e9507372f0b1b963James Dong
1790c1bc742181ded4930842b46e9507372f0b1b963James DongdPredValRow01       DN  D4.U8
1800c1bc742181ded4930842b46e9507372f0b1b963James DongdPredValRow23       DN  D5.U8
1810c1bc742181ded4930842b46e9507372f0b1b963James Dong
1820c1bc742181ded4930842b46e9507372f0b1b963James DongqSumRow01           QN  Q3.S16
1830c1bc742181ded4930842b46e9507372f0b1b963James DongqSumRow23           QN  Q4.S16
1840c1bc742181ded4930842b46e9507372f0b1b963James DongdDstRow01           DN  D0.U8
1850c1bc742181ded4930842b46e9507372f0b1b963James DongdDstRow23           DN  D1.U8
1860c1bc742181ded4930842b46e9507372f0b1b963James DongdDstRow0            DN  D0.32[0]
1870c1bc742181ded4930842b46e9507372f0b1b963James DongdDstRow1            DN  D0.32[1]
1880c1bc742181ded4930842b46e9507372f0b1b963James DongdDstRow2            DN  D1.32[0]
1890c1bc742181ded4930842b46e9507372f0b1b963James DongdDstRow3            DN  D1.32[1]
1900c1bc742181ded4930842b46e9507372f0b1b963James Dong
1910c1bc742181ded4930842b46e9507372f0b1b963James Dong
1920c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// Allocate stack memory required by the function
1930c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC8 pBuffer, 32
1940c1bc742181ded4930842b46e9507372f0b1b963James Dong
1950c1bc742181ded4930842b46e9507372f0b1b963James Dong
1960c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// Write function header
1970c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_START omxVCM4P10_DequantTransformResidualFromPairAndAdd,r11,d9
1980c1bc742181ded4930842b46e9507372f0b1b963James Dong
1990c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Define stack arguments
2000c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG   predStepOnStack, 4
2010c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG   dstStepOnStack,4
2020c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG   QPOnStack, 4
2030c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG   ACOnStack,4
2040c1bc742181ded4930842b46e9507372f0b1b963James Dong
2050c1bc742181ded4930842b46e9507372f0b1b963James Dong
2060c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   pDelta,pBuffer
2070c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR   AC,ACOnStack
2080c1bc742181ded4930842b46e9507372f0b1b963James Dong
2090c1bc742181ded4930842b46e9507372f0b1b963James Dong
2100c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Save registers r1,r2,r3 before function call
2110c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     pPredTemp,pPred
2120c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     pDCTemp,pDC
2130c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     pDstTemp,pDst
2140c1bc742181ded4930842b46e9507372f0b1b963James Dong
2150c1bc742181ded4930842b46e9507372f0b1b963James Dong        CMP     AC,#0
2160c1bc742181ded4930842b46e9507372f0b1b963James Dong        BEQ     DCcase
2170c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     pDeltaArg1,pDelta                           ;// Set up r1 for armVCM4P10_UnpackBlock4x4
2180c1bc742181ded4930842b46e9507372f0b1b963James Dong
2190c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_UnpackBlock4x4
2200c1bc742181ded4930842b46e9507372f0b1b963James Dong
2210c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//--------------------------------------------------------
2220c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// armVCM4P10_DequantLumaAC4x4 : static function inlined
2230c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//--------------------------------------------------------
2240c1bc742181ded4930842b46e9507372f0b1b963James Dong
2250c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//BL      armVCM4P10_DequantLumaAC4x4
2260c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR   QP,QPOnStack                                ;// Set up r1 for armVCM4P10_DequantLumaAC4x4
2270c1bc742181ded4930842b46e9507372f0b1b963James Dong
2280c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR    pQPmod,=armVCM4P10_QPModuloTable
2290c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR    pQPdiv,=armVCM4P10_QPDivTable
2300c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR    pVRow,=armVCM4P10_VMatrixU16
2310c1bc742181ded4930842b46e9507372f0b1b963James Dong
2320c1bc742181ded4930842b46e9507372f0b1b963James Dong
2330c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRSB  QPmod,[pQPmod,QP]                    ;// (QP%6) * 6
2340c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRSB  shift,[pQPdiv,QP]                    ;// Shift = QP / 6
2350c1bc742181ded4930842b46e9507372f0b1b963James Dong
2360c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR    index1,=0x03020504
2370c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR    index0,=0x05040100                   ;// Indexes into dVmatrix
2380c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD    pVRow,pVRow,QPmod
2390c1bc742181ded4930842b46e9507372f0b1b963James Dong        VDUP   dindexRow0,index0
2400c1bc742181ded4930842b46e9507372f0b1b963James Dong        VDUP   dindexRow1,index1
2410c1bc742181ded4930842b46e9507372f0b1b963James Dong        VDUP   dShift,shift
2420c1bc742181ded4930842b46e9507372f0b1b963James Dong
2430c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Load all 4x4 pVRow[] values
2440c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1   dVmatrix,[pVRow]                     ;// dVmatrix = [0d|0c|0b|0a]
2450c1bc742181ded4930842b46e9507372f0b1b963James Dong
2460c1bc742181ded4930842b46e9507372f0b1b963James Dong
2470c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTBL   dVRow0,dVmatrix,dByteIndexRow0       ;// row0 = row2 = [pVRow[2] | pVRow[0] | pVRow[2] | pVRow[0]]
2480c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTBL   dVRow1,dVmatrix,dByteIndexRow1       ;// row1 = row3 = [pVRow[1] | pVRow[2] | pVRow[1] | pVRow[2]]
2490c1bc742181ded4930842b46e9507372f0b1b963James Dong        CMP     pDCTemp,#0
2500c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Load all the 4x4 'src' values
2510c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1   { dSrcRow0,dSrcRow1,dSrcRow2,dSrcRow3 },[pDelta]
2520c1bc742181ded4930842b46e9507372f0b1b963James Dong
2530c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSHL   dVRow0U16,dVRow0U16,dShift
2540c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSHL   dVRow1U16,dVRow1U16,dShift
2550c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRSHNE DCval,[pDCTemp]
2560c1bc742181ded4930842b46e9507372f0b1b963James Dong
2570c1bc742181ded4930842b46e9507372f0b1b963James Dong
2580c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Multiply src[] with pVRow[]
2590c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMUL    dDqntRow0,dSrcRow0,dVRow0U16
2600c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMUL    dDqntRow1,dSrcRow1,dVRow1U16
2610c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMUL    dDqntRow2,dSrcRow2,dVRow2U16
2620c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMUL    dDqntRow3,dSrcRow3,dVRow3U16
2630c1bc742181ded4930842b46e9507372f0b1b963James Dong
2640c1bc742181ded4930842b46e9507372f0b1b963James Dong
2650c1bc742181ded4930842b46e9507372f0b1b963James Dong
2660c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//-------------------------------------------------------------
2670c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// TransformResidual4x4 : Inlined to avoid Load/Stores
2680c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//-------------------------------------------------------------
2690c1bc742181ded4930842b46e9507372f0b1b963James Dong
2700c1bc742181ded4930842b46e9507372f0b1b963James Dong
2710c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//BL      armVCM4P10_TransformResidual4x4
2720c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//STRHNE  DCval,[pDelta]
2730c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMOVNE    dIn0[0],DCval
2740c1bc742181ded4930842b46e9507372f0b1b963James Dong
2750c1bc742181ded4930842b46e9507372f0b1b963James Dong
2760c1bc742181ded4930842b46e9507372f0b1b963James Dong
2770c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//*****************************************************************
2780c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Transpose the input pixels : perform Row ops as Col ops
2790c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//*****************************************************************
2800c1bc742181ded4930842b46e9507372f0b1b963James Dong
2810c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTRN    dIn0,dIn1
2820c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTRN    dIn2,dIn3
2830c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTRN    qIn01,qIn23
2840c1bc742181ded4930842b46e9507372f0b1b963James Dong
2850c1bc742181ded4930842b46e9507372f0b1b963James Dong
2860c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMOV    dZero,#0                                    ;// Used to right shift by 1
2870c1bc742181ded4930842b46e9507372f0b1b963James Dong
2880c1bc742181ded4930842b46e9507372f0b1b963James Dong
2890c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//****************************************
2900c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Row Operations (Performed on columns)
2910c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//****************************************
2920c1bc742181ded4930842b46e9507372f0b1b963James Dong
2930c1bc742181ded4930842b46e9507372f0b1b963James Dong
2940c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADD        de0,dIn0,dIn2                       ;//  e0 = d0 + d2
2950c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        de1,dIn0,dIn2                        ;//  e1 = d0 - d2
2960c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHADD       dIn1RS,dIn1,dZero                   ;// (f1>>1) constZero is a register holding 0
2970c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHADD       dIn3RS,dIn3,dZero
2980c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        de2,dIn1RS,dIn3                     ;//  e2 = (d1>>1) - d3
2990c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADD        de3,dIn1,dIn3RS                        ;//  e3 = d1 + (d3>>1)
3000c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADD        df0,de0,de3                         ;//  f0 = e0 + e3
3010c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADD        df1,de1,de2                            ;//  f1 = e1 + e2
3020c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        df2,de1,de2                            ;//  f2 = e1 - e2
3030c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        df3,de0,de3                            ;//  f3 = e0 - e3
3040c1bc742181ded4930842b46e9507372f0b1b963James Dong
3050c1bc742181ded4930842b46e9507372f0b1b963James Dong
3060c1bc742181ded4930842b46e9507372f0b1b963James Dong
3070c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//*****************************************************************
3080c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Transpose the resultant matrix
3090c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//*****************************************************************
3100c1bc742181ded4930842b46e9507372f0b1b963James Dong
3110c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTRN    df0,df1
3120c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTRN    df2,df3
3130c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTRN    qf01,qf23
3140c1bc742181ded4930842b46e9507372f0b1b963James Dong
3150c1bc742181ded4930842b46e9507372f0b1b963James Dong
3160c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//*******************************
3170c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Coloumn Operations
3180c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//*******************************
3190c1bc742181ded4930842b46e9507372f0b1b963James Dong
3200c1bc742181ded4930842b46e9507372f0b1b963James Dong
3210c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADD        dg0,df0,df2                         ;//  e0 = d0 + d2
3220c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        dg1,df0,df2                            ;//  e1 = d0 - d2
3230c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHADD       df1RS,df1,dZero                     ;// (f1>>1) constZero is a register holding 0
3240c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHADD       df3RS,df3,dZero
3250c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        dg2,df1RS,df3                       ;//  e2 = (d1>>1) - d3
3260c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADD        dg3,df1,df3RS                        ;//  e3 = d1 + (d3>>1)
3270c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADD        dh0,dg0,dg3                         ;//  f0 = e0 + e3
3280c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADD        dh1,dg1,dg2                            ;//  f1 = e1 + e2
3290c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        dh2,dg1,dg2                            ;//  f2 = e1 - e2
3300c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        dh3,dg0,dg3                            ;//  f3 = e0 - e3
3310c1bc742181ded4930842b46e9507372f0b1b963James Dong
3320c1bc742181ded4930842b46e9507372f0b1b963James Dong
3330c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//************************************************
3340c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Calculate final value (colOp[i][j] + 32)>>6
3350c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//************************************************
3360c1bc742181ded4930842b46e9507372f0b1b963James Dong
3370c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRSHR       dh0,#6
3380c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRSHR       dh1,#6
3390c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRSHR       dh2,#6
3400c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRSHR       dh3,#6
3410c1bc742181ded4930842b46e9507372f0b1b963James Dong
3420c1bc742181ded4930842b46e9507372f0b1b963James Dong
3430c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       OutDCcase
3440c1bc742181ded4930842b46e9507372f0b1b963James Dong
3450c1bc742181ded4930842b46e9507372f0b1b963James Dong
3460c1bc742181ded4930842b46e9507372f0b1b963James DongDCcase
3470c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Calculate the Transformed DCvalue : (DCval+32)>>6
3480c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRSH   DCval,[pDCTemp]
3490c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     DCval,DCval,#32
3500c1bc742181ded4930842b46e9507372f0b1b963James Dong        ASR     DCval,DCval,#6
3510c1bc742181ded4930842b46e9507372f0b1b963James Dong
3520c1bc742181ded4930842b46e9507372f0b1b963James Dong        VDUP    dDeltaRow0, DCval                       ;// pDelta[0]  = pDelta[1]  = pDelta[2]  = pDelta[3] = DCval
3530c1bc742181ded4930842b46e9507372f0b1b963James Dong        VDUP    dDeltaRow1, DCval                        ;// pDelta[4]  = pDelta[5]  = pDelta[6]  = pDelta[7] = DCval
3540c1bc742181ded4930842b46e9507372f0b1b963James Dong        VDUP    dDeltaRow2, DCval                        ;// pDelta[8]  = pDelta[9]  = pDelta[10] = pDelta[11] = DCval
3550c1bc742181ded4930842b46e9507372f0b1b963James Dong        VDUP    dDeltaRow3, DCval
3560c1bc742181ded4930842b46e9507372f0b1b963James Dong
3570c1bc742181ded4930842b46e9507372f0b1b963James Dong
3580c1bc742181ded4930842b46e9507372f0b1b963James DongOutDCcase
3590c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR   predstep,predStepOnStack
3600c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR   dstStep,dstStepOnStack
3610c1bc742181ded4930842b46e9507372f0b1b963James Dong
3620c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     PredVal1,[pPredTemp],predstep
3630c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     PredVal2,[pPredTemp],predstep
3640c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMOV    dPredValRow01,PredVal1,PredVal2
3650c1bc742181ded4930842b46e9507372f0b1b963James Dong
3660c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     PredVal1,[pPredTemp],predstep
3670c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     PredVal2,[pPredTemp]
3680c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMOV    dPredValRow23,PredVal1,PredVal2
3690c1bc742181ded4930842b46e9507372f0b1b963James Dong
3700c1bc742181ded4930842b46e9507372f0b1b963James Dong
3710c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADDW   qSumRow01,qDeltaRow01,dPredValRow01
3720c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADDW   qSumRow23,qDeltaRow23,dPredValRow23
3730c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQMOVUN dDstRow01,qSumRow01
3740c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQMOVUN dDstRow23,qSumRow23
3750c1bc742181ded4930842b46e9507372f0b1b963James Dong
3760c1bc742181ded4930842b46e9507372f0b1b963James Dong
3770c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    dDstRow0,[pDstTemp],dstStep
3780c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    dDstRow1,[pDstTemp],dstStep
3790c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    dDstRow2,[pDstTemp],dstStep
3800c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    dDstRow3,[pDstTemp]
3810c1bc742181ded4930842b46e9507372f0b1b963James Dong
3820c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Set return value
3830c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     result,#OMX_Sts_NoErr
3840c1bc742181ded4930842b46e9507372f0b1b963James Dong
3850c1bc742181ded4930842b46e9507372f0b1b963James DongEnd
3860c1bc742181ded4930842b46e9507372f0b1b963James Dong
3870c1bc742181ded4930842b46e9507372f0b1b963James Dong
3880c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Write function tail
3890c1bc742181ded4930842b46e9507372f0b1b963James Dong
3900c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_END
3910c1bc742181ded4930842b46e9507372f0b1b963James Dong
3920c1bc742181ded4930842b46e9507372f0b1b963James Dong    ENDIF                                                    ;//CORTEXA8
3930c1bc742181ded4930842b46e9507372f0b1b963James Dong
3940c1bc742181ded4930842b46e9507372f0b1b963James Dong
3950c1bc742181ded4930842b46e9507372f0b1b963James Dong
3960c1bc742181ded4930842b46e9507372f0b1b963James Dong    END
397