10c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 20c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 30c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name: omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s 40c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2 50c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision: 12290 60c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date: Wednesday, April 9, 2008 70c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 80c1bc742181ded4930842b46e9507372f0b1b963James Dong;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 90c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 100c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 110c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 120c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Description: 130c1bc742181ded4930842b46e9507372f0b1b963James Dong;// H.264 inverse quantize and transform module 140c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 150c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 160c1bc742181ded4930842b46e9507372f0b1b963James Dong 170c1bc742181ded4930842b46e9507372f0b1b963James Dong 180c1bc742181ded4930842b46e9507372f0b1b963James Dong 190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Include standard headers 200c1bc742181ded4930842b46e9507372f0b1b963James Dong 210c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE omxtypes_s.h 220c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE armCOMM_s.h 230c1bc742181ded4930842b46e9507372f0b1b963James Dong 240c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Import symbols required from other files 250c1bc742181ded4930842b46e9507372f0b1b963James Dong;// (For example tables) 260c1bc742181ded4930842b46e9507372f0b1b963James Dong 270c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_UnpackBlock4x4 280c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_TransformResidual4x4 290c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_QPDivTable 300c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_VMatrixU16 310c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_QPModuloTable 320c1bc742181ded4930842b46e9507372f0b1b963James Dong 330c1bc742181ded4930842b46e9507372f0b1b963James Dong M_VARIANTS CortexA8 340c1bc742181ded4930842b46e9507372f0b1b963James Dong 350c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Set debugging level 360c1bc742181ded4930842b46e9507372f0b1b963James Dong;//DEBUG_ON SETL {TRUE} 370c1bc742181ded4930842b46e9507372f0b1b963James Dong 380c1bc742181ded4930842b46e9507372f0b1b963James Dong 390c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Static Function: armVCM4P10_DequantLumaAC4x4 400c1bc742181ded4930842b46e9507372f0b1b963James Dong 410c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Guarding implementation by the processor name 420c1bc742181ded4930842b46e9507372f0b1b963James Dong 430c1bc742181ded4930842b46e9507372f0b1b963James Dong 440c1bc742181ded4930842b46e9507372f0b1b963James Dong 450c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Guarding implementation by the processor name 460c1bc742181ded4930842b46e9507372f0b1b963James Dong 470c1bc742181ded4930842b46e9507372f0b1b963James Dong 480c1bc742181ded4930842b46e9507372f0b1b963James Dong 490c1bc742181ded4930842b46e9507372f0b1b963James Dong 500c1bc742181ded4930842b46e9507372f0b1b963James Dong 510c1bc742181ded4930842b46e9507372f0b1b963James Dong 520c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd 530c1bc742181ded4930842b46e9507372f0b1b963James Dong 540c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Guarding implementation by the processor name 550c1bc742181ded4930842b46e9507372f0b1b963James Dong 560c1bc742181ded4930842b46e9507372f0b1b963James Dong 570c1bc742181ded4930842b46e9507372f0b1b963James Dong 580c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd 590c1bc742181ded4930842b46e9507372f0b1b963James Dong 600c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Guarding implementation by the processor name 610c1bc742181ded4930842b46e9507372f0b1b963James Dong 620c1bc742181ded4930842b46e9507372f0b1b963James Dong IF CortexA8 630c1bc742181ded4930842b46e9507372f0b1b963James Dong 640c1bc742181ded4930842b46e9507372f0b1b963James Dong 650c1bc742181ded4930842b46e9507372f0b1b963James Dong;// ARM Registers 660c1bc742181ded4930842b46e9507372f0b1b963James Dong 670c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Input Registers 680c1bc742181ded4930842b46e9507372f0b1b963James DongppSrc RN 0 690c1bc742181ded4930842b46e9507372f0b1b963James DongpPred RN 1 700c1bc742181ded4930842b46e9507372f0b1b963James DongpDC RN 2 710c1bc742181ded4930842b46e9507372f0b1b963James DongpDst RN 3 720c1bc742181ded4930842b46e9507372f0b1b963James Dong 730c1bc742181ded4930842b46e9507372f0b1b963James Dong 740c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Output Registers 750c1bc742181ded4930842b46e9507372f0b1b963James Dongresult RN 0 760c1bc742181ded4930842b46e9507372f0b1b963James Dong 770c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Local Scratch Registers 780c1bc742181ded4930842b46e9507372f0b1b963James Dong 790c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Registers used in armVCM4P10_DequantLumaAC4x4 800c1bc742181ded4930842b46e9507372f0b1b963James DongpQPdiv RN 10 810c1bc742181ded4930842b46e9507372f0b1b963James DongpQPmod RN 11 820c1bc742181ded4930842b46e9507372f0b1b963James DongpVRow RN 2 830c1bc742181ded4930842b46e9507372f0b1b963James DongQPmod RN 12 840c1bc742181ded4930842b46e9507372f0b1b963James Dongshift RN 14 850c1bc742181ded4930842b46e9507372f0b1b963James Dongindex0 RN 1 860c1bc742181ded4930842b46e9507372f0b1b963James Dongindex1 RN 10 870c1bc742181ded4930842b46e9507372f0b1b963James Dong 880c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Registers used in DequantTransformResidualFromPairAndAdd 890c1bc742181ded4930842b46e9507372f0b1b963James DongpDelta RN 4 900c1bc742181ded4930842b46e9507372f0b1b963James DongpDeltaTmp RN 6 910c1bc742181ded4930842b46e9507372f0b1b963James DongAC RN 5 ;//Load from stack 920c1bc742181ded4930842b46e9507372f0b1b963James DongpPredTemp RN 7 930c1bc742181ded4930842b46e9507372f0b1b963James DongpDCTemp RN 8 940c1bc742181ded4930842b46e9507372f0b1b963James DongpDstTemp RN 9 950c1bc742181ded4930842b46e9507372f0b1b963James DongpDeltaArg1 RN 1 960c1bc742181ded4930842b46e9507372f0b1b963James DongpDeltaArg0 RN 0 970c1bc742181ded4930842b46e9507372f0b1b963James DongQP RN 1 ;//Load from stack 980c1bc742181ded4930842b46e9507372f0b1b963James DongDCval RN 10 990c1bc742181ded4930842b46e9507372f0b1b963James Dongpredstep RN 1 1000c1bc742181ded4930842b46e9507372f0b1b963James DongdstStep RN 10 1010c1bc742181ded4930842b46e9507372f0b1b963James DongPredVal1 RN 3 1020c1bc742181ded4930842b46e9507372f0b1b963James DongPredVal2 RN 5 1030c1bc742181ded4930842b46e9507372f0b1b963James Dong 1040c1bc742181ded4930842b46e9507372f0b1b963James Dong 1050c1bc742181ded4930842b46e9507372f0b1b963James Dong 1060c1bc742181ded4930842b46e9507372f0b1b963James Dong 1070c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Neon Registers 1080c1bc742181ded4930842b46e9507372f0b1b963James Dong 1090c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers used in armVCM4P10_DequantLumaAC4x4 1100c1bc742181ded4930842b46e9507372f0b1b963James Dong 1110c1bc742181ded4930842b46e9507372f0b1b963James DongdVmatrix DN D6.8 1120c1bc742181ded4930842b46e9507372f0b1b963James DongdindexRow0 DN D7.32 1130c1bc742181ded4930842b46e9507372f0b1b963James DongdindexRow1 DN D9.32 1140c1bc742181ded4930842b46e9507372f0b1b963James DongdByteIndexRow0 DN D7.8 1150c1bc742181ded4930842b46e9507372f0b1b963James DongdByteIndexRow1 DN D9.8 1160c1bc742181ded4930842b46e9507372f0b1b963James DongdVRow0 DN D8.8 1170c1bc742181ded4930842b46e9507372f0b1b963James DongdVRow1 DN D4.8 1180c1bc742181ded4930842b46e9507372f0b1b963James DongdVRow0U16 DN D8.U16 1190c1bc742181ded4930842b46e9507372f0b1b963James DongdVRow1U16 DN D4.U16 1200c1bc742181ded4930842b46e9507372f0b1b963James DongdVRow2U16 DN D8.U16 1210c1bc742181ded4930842b46e9507372f0b1b963James DongdVRow3U16 DN D4.U16 1220c1bc742181ded4930842b46e9507372f0b1b963James Dong 1230c1bc742181ded4930842b46e9507372f0b1b963James DongdShift DN D5.U16 1240c1bc742181ded4930842b46e9507372f0b1b963James DongdSrcRow0 DN D0.I16 1250c1bc742181ded4930842b46e9507372f0b1b963James DongdSrcRow1 DN D1.I16 1260c1bc742181ded4930842b46e9507372f0b1b963James DongdSrcRow2 DN D2.I16 1270c1bc742181ded4930842b46e9507372f0b1b963James DongdSrcRow3 DN D3.I16 1280c1bc742181ded4930842b46e9507372f0b1b963James DongdDqntRow0 DN D0.I16 1290c1bc742181ded4930842b46e9507372f0b1b963James DongdDqntRow1 DN D1.I16 1300c1bc742181ded4930842b46e9507372f0b1b963James DongdDqntRow2 DN D2.I16 1310c1bc742181ded4930842b46e9507372f0b1b963James DongdDqntRow3 DN D3.I16 1320c1bc742181ded4930842b46e9507372f0b1b963James Dong 1330c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers used in TransformResidual4x4 1340c1bc742181ded4930842b46e9507372f0b1b963James Dong 1350c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Packed Input pixels 1360c1bc742181ded4930842b46e9507372f0b1b963James DongdIn0 DN D0.S16 1370c1bc742181ded4930842b46e9507372f0b1b963James DongdIn1 DN D1.S16 1380c1bc742181ded4930842b46e9507372f0b1b963James DongdIn2 DN D2.S16 1390c1bc742181ded4930842b46e9507372f0b1b963James DongdIn3 DN D3.S16 1400c1bc742181ded4930842b46e9507372f0b1b963James DongqIn01 QN Q0.32 1410c1bc742181ded4930842b46e9507372f0b1b963James DongqIn23 QN Q1.32 1420c1bc742181ded4930842b46e9507372f0b1b963James Dong 1430c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Intermediate calculations 1440c1bc742181ded4930842b46e9507372f0b1b963James DongdZero DN D4.S16 1450c1bc742181ded4930842b46e9507372f0b1b963James Dongde0 DN D5.S16 1460c1bc742181ded4930842b46e9507372f0b1b963James Dongde1 DN D6.S16 1470c1bc742181ded4930842b46e9507372f0b1b963James Dongde2 DN D7.S16 1480c1bc742181ded4930842b46e9507372f0b1b963James Dongde3 DN D8.S16 1490c1bc742181ded4930842b46e9507372f0b1b963James DongdIn1RS DN D7.S16 1500c1bc742181ded4930842b46e9507372f0b1b963James DongdIn3RS DN D8.S16 1510c1bc742181ded4930842b46e9507372f0b1b963James Dongdf0 DN D0.S16 1520c1bc742181ded4930842b46e9507372f0b1b963James Dongdf1 DN D1.S16 1530c1bc742181ded4930842b46e9507372f0b1b963James Dongdf2 DN D2.S16 1540c1bc742181ded4930842b46e9507372f0b1b963James Dongdf3 DN D3.S16 1550c1bc742181ded4930842b46e9507372f0b1b963James Dongqf01 QN Q0.32 1560c1bc742181ded4930842b46e9507372f0b1b963James Dongqf23 QN Q1.32 1570c1bc742181ded4930842b46e9507372f0b1b963James Dongdg0 DN D5.S16 1580c1bc742181ded4930842b46e9507372f0b1b963James Dongdg1 DN D6.S16 1590c1bc742181ded4930842b46e9507372f0b1b963James Dongdg2 DN D7.S16 1600c1bc742181ded4930842b46e9507372f0b1b963James Dongdg3 DN D8.S16 1610c1bc742181ded4930842b46e9507372f0b1b963James Dongdf1RS DN D7.S16 1620c1bc742181ded4930842b46e9507372f0b1b963James Dongdf3RS DN D8.S16 1630c1bc742181ded4930842b46e9507372f0b1b963James Dong 1640c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Output pixels 1650c1bc742181ded4930842b46e9507372f0b1b963James Dongdh0 DN D0.S16 1660c1bc742181ded4930842b46e9507372f0b1b963James Dongdh1 DN D1.S16 1670c1bc742181ded4930842b46e9507372f0b1b963James Dongdh2 DN D2.S16 1680c1bc742181ded4930842b46e9507372f0b1b963James Dongdh3 DN D3.S16 1690c1bc742181ded4930842b46e9507372f0b1b963James Dong 1700c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers used in DequantTransformResidualFromPairAndAdd 1710c1bc742181ded4930842b46e9507372f0b1b963James Dong 1720c1bc742181ded4930842b46e9507372f0b1b963James DongdDeltaRow0 DN D0.S16 1730c1bc742181ded4930842b46e9507372f0b1b963James DongdDeltaRow1 DN D1.S16 1740c1bc742181ded4930842b46e9507372f0b1b963James DongdDeltaRow2 DN D2.S16 1750c1bc742181ded4930842b46e9507372f0b1b963James DongdDeltaRow3 DN D3.S16 1760c1bc742181ded4930842b46e9507372f0b1b963James DongqDeltaRow01 QN Q0.S16 1770c1bc742181ded4930842b46e9507372f0b1b963James DongqDeltaRow23 QN Q1.S16 1780c1bc742181ded4930842b46e9507372f0b1b963James Dong 1790c1bc742181ded4930842b46e9507372f0b1b963James DongdPredValRow01 DN D4.U8 1800c1bc742181ded4930842b46e9507372f0b1b963James DongdPredValRow23 DN D5.U8 1810c1bc742181ded4930842b46e9507372f0b1b963James Dong 1820c1bc742181ded4930842b46e9507372f0b1b963James DongqSumRow01 QN Q3.S16 1830c1bc742181ded4930842b46e9507372f0b1b963James DongqSumRow23 QN Q4.S16 1840c1bc742181ded4930842b46e9507372f0b1b963James DongdDstRow01 DN D0.U8 1850c1bc742181ded4930842b46e9507372f0b1b963James DongdDstRow23 DN D1.U8 1860c1bc742181ded4930842b46e9507372f0b1b963James DongdDstRow0 DN D0.32[0] 1870c1bc742181ded4930842b46e9507372f0b1b963James DongdDstRow1 DN D0.32[1] 1880c1bc742181ded4930842b46e9507372f0b1b963James DongdDstRow2 DN D1.32[0] 1890c1bc742181ded4930842b46e9507372f0b1b963James DongdDstRow3 DN D1.32[1] 1900c1bc742181ded4930842b46e9507372f0b1b963James Dong 1910c1bc742181ded4930842b46e9507372f0b1b963James Dong 1920c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Allocate stack memory required by the function 1930c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC8 pBuffer, 32 1940c1bc742181ded4930842b46e9507372f0b1b963James Dong 1950c1bc742181ded4930842b46e9507372f0b1b963James Dong 1960c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Write function header 1970c1bc742181ded4930842b46e9507372f0b1b963James Dong M_START omxVCM4P10_DequantTransformResidualFromPairAndAdd,r11,d9 1980c1bc742181ded4930842b46e9507372f0b1b963James Dong 1990c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Define stack arguments 2000c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG predStepOnStack, 4 2010c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG dstStepOnStack,4 2020c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG QPOnStack, 4 2030c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG ACOnStack,4 2040c1bc742181ded4930842b46e9507372f0b1b963James Dong 2050c1bc742181ded4930842b46e9507372f0b1b963James Dong 2060c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pDelta,pBuffer 2070c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR AC,ACOnStack 2080c1bc742181ded4930842b46e9507372f0b1b963James Dong 2090c1bc742181ded4930842b46e9507372f0b1b963James Dong 2100c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Save registers r1,r2,r3 before function call 2110c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV pPredTemp,pPred 2120c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV pDCTemp,pDC 2130c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV pDstTemp,pDst 2140c1bc742181ded4930842b46e9507372f0b1b963James Dong 2150c1bc742181ded4930842b46e9507372f0b1b963James Dong CMP AC,#0 2160c1bc742181ded4930842b46e9507372f0b1b963James Dong BEQ DCcase 2170c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV pDeltaArg1,pDelta ;// Set up r1 for armVCM4P10_UnpackBlock4x4 2180c1bc742181ded4930842b46e9507372f0b1b963James Dong 2190c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_UnpackBlock4x4 2200c1bc742181ded4930842b46e9507372f0b1b963James Dong 2210c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//-------------------------------------------------------- 2220c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// armVCM4P10_DequantLumaAC4x4 : static function inlined 2230c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//-------------------------------------------------------- 2240c1bc742181ded4930842b46e9507372f0b1b963James Dong 2250c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//BL armVCM4P10_DequantLumaAC4x4 2260c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR QP,QPOnStack ;// Set up r1 for armVCM4P10_DequantLumaAC4x4 2270c1bc742181ded4930842b46e9507372f0b1b963James Dong 2280c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR pQPmod,=armVCM4P10_QPModuloTable 2290c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR pQPdiv,=armVCM4P10_QPDivTable 2300c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR pVRow,=armVCM4P10_VMatrixU16 2310c1bc742181ded4930842b46e9507372f0b1b963James Dong 2320c1bc742181ded4930842b46e9507372f0b1b963James Dong 2330c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRSB QPmod,[pQPmod,QP] ;// (QP%6) * 6 2340c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRSB shift,[pQPdiv,QP] ;// Shift = QP / 6 2350c1bc742181ded4930842b46e9507372f0b1b963James Dong 2360c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR index1,=0x03020504 2370c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR index0,=0x05040100 ;// Indexes into dVmatrix 2380c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pVRow,pVRow,QPmod 2390c1bc742181ded4930842b46e9507372f0b1b963James Dong VDUP dindexRow0,index0 2400c1bc742181ded4930842b46e9507372f0b1b963James Dong VDUP dindexRow1,index1 2410c1bc742181ded4930842b46e9507372f0b1b963James Dong VDUP dShift,shift 2420c1bc742181ded4930842b46e9507372f0b1b963James Dong 2430c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Load all 4x4 pVRow[] values 2440c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 dVmatrix,[pVRow] ;// dVmatrix = [0d|0c|0b|0a] 2450c1bc742181ded4930842b46e9507372f0b1b963James Dong 2460c1bc742181ded4930842b46e9507372f0b1b963James Dong 2470c1bc742181ded4930842b46e9507372f0b1b963James Dong VTBL dVRow0,dVmatrix,dByteIndexRow0 ;// row0 = row2 = [pVRow[2] | pVRow[0] | pVRow[2] | pVRow[0]] 2480c1bc742181ded4930842b46e9507372f0b1b963James Dong VTBL dVRow1,dVmatrix,dByteIndexRow1 ;// row1 = row3 = [pVRow[1] | pVRow[2] | pVRow[1] | pVRow[2]] 2490c1bc742181ded4930842b46e9507372f0b1b963James Dong CMP pDCTemp,#0 2500c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Load all the 4x4 'src' values 2510c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 { dSrcRow0,dSrcRow1,dSrcRow2,dSrcRow3 },[pDelta] 2520c1bc742181ded4930842b46e9507372f0b1b963James Dong 2530c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHL dVRow0U16,dVRow0U16,dShift 2540c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHL dVRow1U16,dVRow1U16,dShift 2550c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRSHNE DCval,[pDCTemp] 2560c1bc742181ded4930842b46e9507372f0b1b963James Dong 2570c1bc742181ded4930842b46e9507372f0b1b963James Dong 2580c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Multiply src[] with pVRow[] 2590c1bc742181ded4930842b46e9507372f0b1b963James Dong VMUL dDqntRow0,dSrcRow0,dVRow0U16 2600c1bc742181ded4930842b46e9507372f0b1b963James Dong VMUL dDqntRow1,dSrcRow1,dVRow1U16 2610c1bc742181ded4930842b46e9507372f0b1b963James Dong VMUL dDqntRow2,dSrcRow2,dVRow2U16 2620c1bc742181ded4930842b46e9507372f0b1b963James Dong VMUL dDqntRow3,dSrcRow3,dVRow3U16 2630c1bc742181ded4930842b46e9507372f0b1b963James Dong 2640c1bc742181ded4930842b46e9507372f0b1b963James Dong 2650c1bc742181ded4930842b46e9507372f0b1b963James Dong 2660c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//------------------------------------------------------------- 2670c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// TransformResidual4x4 : Inlined to avoid Load/Stores 2680c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//------------------------------------------------------------- 2690c1bc742181ded4930842b46e9507372f0b1b963James Dong 2700c1bc742181ded4930842b46e9507372f0b1b963James Dong 2710c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//BL armVCM4P10_TransformResidual4x4 2720c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//STRHNE DCval,[pDelta] 2730c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVNE dIn0[0],DCval 2740c1bc742181ded4930842b46e9507372f0b1b963James Dong 2750c1bc742181ded4930842b46e9507372f0b1b963James Dong 2760c1bc742181ded4930842b46e9507372f0b1b963James Dong 2770c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//***************************************************************** 2780c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Transpose the input pixels : perform Row ops as Col ops 2790c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//***************************************************************** 2800c1bc742181ded4930842b46e9507372f0b1b963James Dong 2810c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN dIn0,dIn1 2820c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN dIn2,dIn3 2830c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qIn01,qIn23 2840c1bc742181ded4930842b46e9507372f0b1b963James Dong 2850c1bc742181ded4930842b46e9507372f0b1b963James Dong 2860c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOV dZero,#0 ;// Used to right shift by 1 2870c1bc742181ded4930842b46e9507372f0b1b963James Dong 2880c1bc742181ded4930842b46e9507372f0b1b963James Dong 2890c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//**************************************** 2900c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Row Operations (Performed on columns) 2910c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//**************************************** 2920c1bc742181ded4930842b46e9507372f0b1b963James Dong 2930c1bc742181ded4930842b46e9507372f0b1b963James Dong 2940c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD de0,dIn0,dIn2 ;// e0 = d0 + d2 2950c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB de1,dIn0,dIn2 ;// e1 = d0 - d2 2960c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD dIn1RS,dIn1,dZero ;// (f1>>1) constZero is a register holding 0 2970c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD dIn3RS,dIn3,dZero 2980c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB de2,dIn1RS,dIn3 ;// e2 = (d1>>1) - d3 2990c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD de3,dIn1,dIn3RS ;// e3 = d1 + (d3>>1) 3000c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD df0,de0,de3 ;// f0 = e0 + e3 3010c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD df1,de1,de2 ;// f1 = e1 + e2 3020c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB df2,de1,de2 ;// f2 = e1 - e2 3030c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB df3,de0,de3 ;// f3 = e0 - e3 3040c1bc742181ded4930842b46e9507372f0b1b963James Dong 3050c1bc742181ded4930842b46e9507372f0b1b963James Dong 3060c1bc742181ded4930842b46e9507372f0b1b963James Dong 3070c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//***************************************************************** 3080c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Transpose the resultant matrix 3090c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//***************************************************************** 3100c1bc742181ded4930842b46e9507372f0b1b963James Dong 3110c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN df0,df1 3120c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN df2,df3 3130c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qf01,qf23 3140c1bc742181ded4930842b46e9507372f0b1b963James Dong 3150c1bc742181ded4930842b46e9507372f0b1b963James Dong 3160c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//******************************* 3170c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Coloumn Operations 3180c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//******************************* 3190c1bc742181ded4930842b46e9507372f0b1b963James Dong 3200c1bc742181ded4930842b46e9507372f0b1b963James Dong 3210c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD dg0,df0,df2 ;// e0 = d0 + d2 3220c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB dg1,df0,df2 ;// e1 = d0 - d2 3230c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD df1RS,df1,dZero ;// (f1>>1) constZero is a register holding 0 3240c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD df3RS,df3,dZero 3250c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB dg2,df1RS,df3 ;// e2 = (d1>>1) - d3 3260c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD dg3,df1,df3RS ;// e3 = d1 + (d3>>1) 3270c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD dh0,dg0,dg3 ;// f0 = e0 + e3 3280c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD dh1,dg1,dg2 ;// f1 = e1 + e2 3290c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB dh2,dg1,dg2 ;// f2 = e1 - e2 3300c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB dh3,dg0,dg3 ;// f3 = e0 - e3 3310c1bc742181ded4930842b46e9507372f0b1b963James Dong 3320c1bc742181ded4930842b46e9507372f0b1b963James Dong 3330c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//************************************************ 3340c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Calculate final value (colOp[i][j] + 32)>>6 3350c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//************************************************ 3360c1bc742181ded4930842b46e9507372f0b1b963James Dong 3370c1bc742181ded4930842b46e9507372f0b1b963James Dong VRSHR dh0,#6 3380c1bc742181ded4930842b46e9507372f0b1b963James Dong VRSHR dh1,#6 3390c1bc742181ded4930842b46e9507372f0b1b963James Dong VRSHR dh2,#6 3400c1bc742181ded4930842b46e9507372f0b1b963James Dong VRSHR dh3,#6 3410c1bc742181ded4930842b46e9507372f0b1b963James Dong 3420c1bc742181ded4930842b46e9507372f0b1b963James Dong 3430c1bc742181ded4930842b46e9507372f0b1b963James Dong B OutDCcase 3440c1bc742181ded4930842b46e9507372f0b1b963James Dong 3450c1bc742181ded4930842b46e9507372f0b1b963James Dong 3460c1bc742181ded4930842b46e9507372f0b1b963James DongDCcase 3470c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Calculate the Transformed DCvalue : (DCval+32)>>6 3480c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRSH DCval,[pDCTemp] 3490c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD DCval,DCval,#32 3500c1bc742181ded4930842b46e9507372f0b1b963James Dong ASR DCval,DCval,#6 3510c1bc742181ded4930842b46e9507372f0b1b963James Dong 3520c1bc742181ded4930842b46e9507372f0b1b963James Dong VDUP dDeltaRow0, DCval ;// pDelta[0] = pDelta[1] = pDelta[2] = pDelta[3] = DCval 3530c1bc742181ded4930842b46e9507372f0b1b963James Dong VDUP dDeltaRow1, DCval ;// pDelta[4] = pDelta[5] = pDelta[6] = pDelta[7] = DCval 3540c1bc742181ded4930842b46e9507372f0b1b963James Dong VDUP dDeltaRow2, DCval ;// pDelta[8] = pDelta[9] = pDelta[10] = pDelta[11] = DCval 3550c1bc742181ded4930842b46e9507372f0b1b963James Dong VDUP dDeltaRow3, DCval 3560c1bc742181ded4930842b46e9507372f0b1b963James Dong 3570c1bc742181ded4930842b46e9507372f0b1b963James Dong 3580c1bc742181ded4930842b46e9507372f0b1b963James DongOutDCcase 3590c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR predstep,predStepOnStack 3600c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR dstStep,dstStepOnStack 3610c1bc742181ded4930842b46e9507372f0b1b963James Dong 3620c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR PredVal1,[pPredTemp],predstep 3630c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR PredVal2,[pPredTemp],predstep 3640c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOV dPredValRow01,PredVal1,PredVal2 3650c1bc742181ded4930842b46e9507372f0b1b963James Dong 3660c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR PredVal1,[pPredTemp],predstep 3670c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR PredVal2,[pPredTemp] 3680c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOV dPredValRow23,PredVal1,PredVal2 3690c1bc742181ded4930842b46e9507372f0b1b963James Dong 3700c1bc742181ded4930842b46e9507372f0b1b963James Dong 3710c1bc742181ded4930842b46e9507372f0b1b963James Dong VADDW qSumRow01,qDeltaRow01,dPredValRow01 3720c1bc742181ded4930842b46e9507372f0b1b963James Dong VADDW qSumRow23,qDeltaRow23,dPredValRow23 3730c1bc742181ded4930842b46e9507372f0b1b963James Dong VQMOVUN dDstRow01,qSumRow01 3740c1bc742181ded4930842b46e9507372f0b1b963James Dong VQMOVUN dDstRow23,qSumRow23 3750c1bc742181ded4930842b46e9507372f0b1b963James Dong 3760c1bc742181ded4930842b46e9507372f0b1b963James Dong 3770c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dDstRow0,[pDstTemp],dstStep 3780c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dDstRow1,[pDstTemp],dstStep 3790c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dDstRow2,[pDstTemp],dstStep 3800c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dDstRow3,[pDstTemp] 3810c1bc742181ded4930842b46e9507372f0b1b963James Dong 3820c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Set return value 3830c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV result,#OMX_Sts_NoErr 3840c1bc742181ded4930842b46e9507372f0b1b963James Dong 3850c1bc742181ded4930842b46e9507372f0b1b963James DongEnd 3860c1bc742181ded4930842b46e9507372f0b1b963James Dong 3870c1bc742181ded4930842b46e9507372f0b1b963James Dong 3880c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Write function tail 3890c1bc742181ded4930842b46e9507372f0b1b963James Dong 3900c1bc742181ded4930842b46e9507372f0b1b963James Dong M_END 3910c1bc742181ded4930842b46e9507372f0b1b963James Dong 3920c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF ;//CORTEXA8 3930c1bc742181ded4930842b46e9507372f0b1b963James Dong 3940c1bc742181ded4930842b46e9507372f0b1b963James Dong 3950c1bc742181ded4930842b46e9507372f0b1b963James Dong 3960c1bc742181ded4930842b46e9507372f0b1b963James Dong END 397