omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s revision 78e52bfac041d71ce53b5b13c2abf78af742b09d
10c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Copyright (C) 2007-2008 ARM Limited 378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Licensed under the Apache License, Version 2.0 (the "License"); 578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// you may not use this file except in compliance with the License. 678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// You may obtain a copy of the License at 778e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 878e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// http://www.apache.org/licenses/LICENSE-2.0 978e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 1078e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Unless required by applicable law or agreed to in writing, software 1178e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// distributed under the License is distributed on an "AS IS" BASIS, 1278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// See the License for the specific language governing permissions and 1478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// limitations under the License. 1578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 1678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 170c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 180c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name: omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s 190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2 200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision: 12290 210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date: Wednesday, April 9, 2008 220c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 230c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 240c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 250c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 260c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Description: 270c1bc742181ded4930842b46e9507372f0b1b963James Dong;// H.264 inverse quantize and transform module 280c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 290c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 300c1bc742181ded4930842b46e9507372f0b1b963James Dong 310c1bc742181ded4930842b46e9507372f0b1b963James Dong 320c1bc742181ded4930842b46e9507372f0b1b963James Dong 330c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Include standard headers 340c1bc742181ded4930842b46e9507372f0b1b963James Dong 350c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE omxtypes_s.h 360c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE armCOMM_s.h 370c1bc742181ded4930842b46e9507372f0b1b963James Dong 380c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Import symbols required from other files 390c1bc742181ded4930842b46e9507372f0b1b963James Dong;// (For example tables) 400c1bc742181ded4930842b46e9507372f0b1b963James Dong 410c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_UnpackBlock4x4 420c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_TransformResidual4x4 430c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_QPDivTable 440c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_VMatrixU16 450c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_QPModuloTable 460c1bc742181ded4930842b46e9507372f0b1b963James Dong 470c1bc742181ded4930842b46e9507372f0b1b963James Dong M_VARIANTS CortexA8 480c1bc742181ded4930842b46e9507372f0b1b963James Dong 490c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Set debugging level 500c1bc742181ded4930842b46e9507372f0b1b963James Dong;//DEBUG_ON SETL {TRUE} 510c1bc742181ded4930842b46e9507372f0b1b963James Dong 520c1bc742181ded4930842b46e9507372f0b1b963James Dong 530c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Static Function: armVCM4P10_DequantLumaAC4x4 540c1bc742181ded4930842b46e9507372f0b1b963James Dong 550c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Guarding implementation by the processor name 560c1bc742181ded4930842b46e9507372f0b1b963James Dong 570c1bc742181ded4930842b46e9507372f0b1b963James Dong 580c1bc742181ded4930842b46e9507372f0b1b963James Dong 590c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Guarding implementation by the processor name 600c1bc742181ded4930842b46e9507372f0b1b963James Dong 610c1bc742181ded4930842b46e9507372f0b1b963James Dong 620c1bc742181ded4930842b46e9507372f0b1b963James Dong 630c1bc742181ded4930842b46e9507372f0b1b963James Dong 640c1bc742181ded4930842b46e9507372f0b1b963James Dong 650c1bc742181ded4930842b46e9507372f0b1b963James Dong 660c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd 670c1bc742181ded4930842b46e9507372f0b1b963James Dong 680c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Guarding implementation by the processor name 690c1bc742181ded4930842b46e9507372f0b1b963James Dong 700c1bc742181ded4930842b46e9507372f0b1b963James Dong 710c1bc742181ded4930842b46e9507372f0b1b963James Dong 720c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd 730c1bc742181ded4930842b46e9507372f0b1b963James Dong 740c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Guarding implementation by the processor name 750c1bc742181ded4930842b46e9507372f0b1b963James Dong 760c1bc742181ded4930842b46e9507372f0b1b963James Dong IF CortexA8 770c1bc742181ded4930842b46e9507372f0b1b963James Dong 780c1bc742181ded4930842b46e9507372f0b1b963James Dong 790c1bc742181ded4930842b46e9507372f0b1b963James Dong;// ARM Registers 800c1bc742181ded4930842b46e9507372f0b1b963James Dong 810c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Input Registers 820c1bc742181ded4930842b46e9507372f0b1b963James DongppSrc RN 0 830c1bc742181ded4930842b46e9507372f0b1b963James DongpPred RN 1 840c1bc742181ded4930842b46e9507372f0b1b963James DongpDC RN 2 850c1bc742181ded4930842b46e9507372f0b1b963James DongpDst RN 3 860c1bc742181ded4930842b46e9507372f0b1b963James Dong 870c1bc742181ded4930842b46e9507372f0b1b963James Dong 880c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Output Registers 890c1bc742181ded4930842b46e9507372f0b1b963James Dongresult RN 0 900c1bc742181ded4930842b46e9507372f0b1b963James Dong 910c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Local Scratch Registers 920c1bc742181ded4930842b46e9507372f0b1b963James Dong 930c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Registers used in armVCM4P10_DequantLumaAC4x4 940c1bc742181ded4930842b46e9507372f0b1b963James DongpQPdiv RN 10 950c1bc742181ded4930842b46e9507372f0b1b963James DongpQPmod RN 11 960c1bc742181ded4930842b46e9507372f0b1b963James DongpVRow RN 2 970c1bc742181ded4930842b46e9507372f0b1b963James DongQPmod RN 12 980c1bc742181ded4930842b46e9507372f0b1b963James Dongshift RN 14 990c1bc742181ded4930842b46e9507372f0b1b963James Dongindex0 RN 1 1000c1bc742181ded4930842b46e9507372f0b1b963James Dongindex1 RN 10 1010c1bc742181ded4930842b46e9507372f0b1b963James Dong 1020c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Registers used in DequantTransformResidualFromPairAndAdd 1030c1bc742181ded4930842b46e9507372f0b1b963James DongpDelta RN 4 1040c1bc742181ded4930842b46e9507372f0b1b963James DongpDeltaTmp RN 6 1050c1bc742181ded4930842b46e9507372f0b1b963James DongAC RN 5 ;//Load from stack 1060c1bc742181ded4930842b46e9507372f0b1b963James DongpPredTemp RN 7 1070c1bc742181ded4930842b46e9507372f0b1b963James DongpDCTemp RN 8 1080c1bc742181ded4930842b46e9507372f0b1b963James DongpDstTemp RN 9 1090c1bc742181ded4930842b46e9507372f0b1b963James DongpDeltaArg1 RN 1 1100c1bc742181ded4930842b46e9507372f0b1b963James DongpDeltaArg0 RN 0 1110c1bc742181ded4930842b46e9507372f0b1b963James DongQP RN 1 ;//Load from stack 1120c1bc742181ded4930842b46e9507372f0b1b963James DongDCval RN 10 1130c1bc742181ded4930842b46e9507372f0b1b963James Dongpredstep RN 1 1140c1bc742181ded4930842b46e9507372f0b1b963James DongdstStep RN 10 1150c1bc742181ded4930842b46e9507372f0b1b963James DongPredVal1 RN 3 1160c1bc742181ded4930842b46e9507372f0b1b963James DongPredVal2 RN 5 1170c1bc742181ded4930842b46e9507372f0b1b963James Dong 1180c1bc742181ded4930842b46e9507372f0b1b963James Dong 1190c1bc742181ded4930842b46e9507372f0b1b963James Dong 1200c1bc742181ded4930842b46e9507372f0b1b963James Dong 1210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Neon Registers 1220c1bc742181ded4930842b46e9507372f0b1b963James Dong 1230c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers used in armVCM4P10_DequantLumaAC4x4 1240c1bc742181ded4930842b46e9507372f0b1b963James Dong 1250c1bc742181ded4930842b46e9507372f0b1b963James DongdVmatrix DN D6.8 1260c1bc742181ded4930842b46e9507372f0b1b963James DongdindexRow0 DN D7.32 1270c1bc742181ded4930842b46e9507372f0b1b963James DongdindexRow1 DN D9.32 1280c1bc742181ded4930842b46e9507372f0b1b963James DongdByteIndexRow0 DN D7.8 1290c1bc742181ded4930842b46e9507372f0b1b963James DongdByteIndexRow1 DN D9.8 1300c1bc742181ded4930842b46e9507372f0b1b963James DongdVRow0 DN D8.8 1310c1bc742181ded4930842b46e9507372f0b1b963James DongdVRow1 DN D4.8 1320c1bc742181ded4930842b46e9507372f0b1b963James DongdVRow0U16 DN D8.U16 1330c1bc742181ded4930842b46e9507372f0b1b963James DongdVRow1U16 DN D4.U16 1340c1bc742181ded4930842b46e9507372f0b1b963James DongdVRow2U16 DN D8.U16 1350c1bc742181ded4930842b46e9507372f0b1b963James DongdVRow3U16 DN D4.U16 1360c1bc742181ded4930842b46e9507372f0b1b963James Dong 1370c1bc742181ded4930842b46e9507372f0b1b963James DongdShift DN D5.U16 1380c1bc742181ded4930842b46e9507372f0b1b963James DongdSrcRow0 DN D0.I16 1390c1bc742181ded4930842b46e9507372f0b1b963James DongdSrcRow1 DN D1.I16 1400c1bc742181ded4930842b46e9507372f0b1b963James DongdSrcRow2 DN D2.I16 1410c1bc742181ded4930842b46e9507372f0b1b963James DongdSrcRow3 DN D3.I16 1420c1bc742181ded4930842b46e9507372f0b1b963James DongdDqntRow0 DN D0.I16 1430c1bc742181ded4930842b46e9507372f0b1b963James DongdDqntRow1 DN D1.I16 1440c1bc742181ded4930842b46e9507372f0b1b963James DongdDqntRow2 DN D2.I16 1450c1bc742181ded4930842b46e9507372f0b1b963James DongdDqntRow3 DN D3.I16 1460c1bc742181ded4930842b46e9507372f0b1b963James Dong 1470c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers used in TransformResidual4x4 1480c1bc742181ded4930842b46e9507372f0b1b963James Dong 1490c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Packed Input pixels 1500c1bc742181ded4930842b46e9507372f0b1b963James DongdIn0 DN D0.S16 1510c1bc742181ded4930842b46e9507372f0b1b963James DongdIn1 DN D1.S16 1520c1bc742181ded4930842b46e9507372f0b1b963James DongdIn2 DN D2.S16 1530c1bc742181ded4930842b46e9507372f0b1b963James DongdIn3 DN D3.S16 1540c1bc742181ded4930842b46e9507372f0b1b963James DongqIn01 QN Q0.32 1550c1bc742181ded4930842b46e9507372f0b1b963James DongqIn23 QN Q1.32 1560c1bc742181ded4930842b46e9507372f0b1b963James Dong 1570c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Intermediate calculations 1580c1bc742181ded4930842b46e9507372f0b1b963James DongdZero DN D4.S16 1590c1bc742181ded4930842b46e9507372f0b1b963James Dongde0 DN D5.S16 1600c1bc742181ded4930842b46e9507372f0b1b963James Dongde1 DN D6.S16 1610c1bc742181ded4930842b46e9507372f0b1b963James Dongde2 DN D7.S16 1620c1bc742181ded4930842b46e9507372f0b1b963James Dongde3 DN D8.S16 1630c1bc742181ded4930842b46e9507372f0b1b963James DongdIn1RS DN D7.S16 1640c1bc742181ded4930842b46e9507372f0b1b963James DongdIn3RS DN D8.S16 1650c1bc742181ded4930842b46e9507372f0b1b963James Dongdf0 DN D0.S16 1660c1bc742181ded4930842b46e9507372f0b1b963James Dongdf1 DN D1.S16 1670c1bc742181ded4930842b46e9507372f0b1b963James Dongdf2 DN D2.S16 1680c1bc742181ded4930842b46e9507372f0b1b963James Dongdf3 DN D3.S16 1690c1bc742181ded4930842b46e9507372f0b1b963James Dongqf01 QN Q0.32 1700c1bc742181ded4930842b46e9507372f0b1b963James Dongqf23 QN Q1.32 1710c1bc742181ded4930842b46e9507372f0b1b963James Dongdg0 DN D5.S16 1720c1bc742181ded4930842b46e9507372f0b1b963James Dongdg1 DN D6.S16 1730c1bc742181ded4930842b46e9507372f0b1b963James Dongdg2 DN D7.S16 1740c1bc742181ded4930842b46e9507372f0b1b963James Dongdg3 DN D8.S16 1750c1bc742181ded4930842b46e9507372f0b1b963James Dongdf1RS DN D7.S16 1760c1bc742181ded4930842b46e9507372f0b1b963James Dongdf3RS DN D8.S16 1770c1bc742181ded4930842b46e9507372f0b1b963James Dong 1780c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Output pixels 1790c1bc742181ded4930842b46e9507372f0b1b963James Dongdh0 DN D0.S16 1800c1bc742181ded4930842b46e9507372f0b1b963James Dongdh1 DN D1.S16 1810c1bc742181ded4930842b46e9507372f0b1b963James Dongdh2 DN D2.S16 1820c1bc742181ded4930842b46e9507372f0b1b963James Dongdh3 DN D3.S16 1830c1bc742181ded4930842b46e9507372f0b1b963James Dong 1840c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers used in DequantTransformResidualFromPairAndAdd 1850c1bc742181ded4930842b46e9507372f0b1b963James Dong 1860c1bc742181ded4930842b46e9507372f0b1b963James DongdDeltaRow0 DN D0.S16 1870c1bc742181ded4930842b46e9507372f0b1b963James DongdDeltaRow1 DN D1.S16 1880c1bc742181ded4930842b46e9507372f0b1b963James DongdDeltaRow2 DN D2.S16 1890c1bc742181ded4930842b46e9507372f0b1b963James DongdDeltaRow3 DN D3.S16 1900c1bc742181ded4930842b46e9507372f0b1b963James DongqDeltaRow01 QN Q0.S16 1910c1bc742181ded4930842b46e9507372f0b1b963James DongqDeltaRow23 QN Q1.S16 1920c1bc742181ded4930842b46e9507372f0b1b963James Dong 1930c1bc742181ded4930842b46e9507372f0b1b963James DongdPredValRow01 DN D4.U8 1940c1bc742181ded4930842b46e9507372f0b1b963James DongdPredValRow23 DN D5.U8 1950c1bc742181ded4930842b46e9507372f0b1b963James Dong 1960c1bc742181ded4930842b46e9507372f0b1b963James DongqSumRow01 QN Q3.S16 1970c1bc742181ded4930842b46e9507372f0b1b963James DongqSumRow23 QN Q4.S16 1980c1bc742181ded4930842b46e9507372f0b1b963James DongdDstRow01 DN D0.U8 1990c1bc742181ded4930842b46e9507372f0b1b963James DongdDstRow23 DN D1.U8 2000c1bc742181ded4930842b46e9507372f0b1b963James DongdDstRow0 DN D0.32[0] 2010c1bc742181ded4930842b46e9507372f0b1b963James DongdDstRow1 DN D0.32[1] 2020c1bc742181ded4930842b46e9507372f0b1b963James DongdDstRow2 DN D1.32[0] 2030c1bc742181ded4930842b46e9507372f0b1b963James DongdDstRow3 DN D1.32[1] 2040c1bc742181ded4930842b46e9507372f0b1b963James Dong 2050c1bc742181ded4930842b46e9507372f0b1b963James Dong 2060c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Allocate stack memory required by the function 2070c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC8 pBuffer, 32 2080c1bc742181ded4930842b46e9507372f0b1b963James Dong 2090c1bc742181ded4930842b46e9507372f0b1b963James Dong 2100c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Write function header 2110c1bc742181ded4930842b46e9507372f0b1b963James Dong M_START omxVCM4P10_DequantTransformResidualFromPairAndAdd,r11,d9 2120c1bc742181ded4930842b46e9507372f0b1b963James Dong 2130c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Define stack arguments 2140c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG predStepOnStack, 4 2150c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG dstStepOnStack,4 2160c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG QPOnStack, 4 2170c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG ACOnStack,4 2180c1bc742181ded4930842b46e9507372f0b1b963James Dong 2190c1bc742181ded4930842b46e9507372f0b1b963James Dong 2200c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pDelta,pBuffer 2210c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR AC,ACOnStack 2220c1bc742181ded4930842b46e9507372f0b1b963James Dong 2230c1bc742181ded4930842b46e9507372f0b1b963James Dong 2240c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Save registers r1,r2,r3 before function call 2250c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV pPredTemp,pPred 2260c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV pDCTemp,pDC 2270c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV pDstTemp,pDst 2280c1bc742181ded4930842b46e9507372f0b1b963James Dong 2290c1bc742181ded4930842b46e9507372f0b1b963James Dong CMP AC,#0 2300c1bc742181ded4930842b46e9507372f0b1b963James Dong BEQ DCcase 2310c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV pDeltaArg1,pDelta ;// Set up r1 for armVCM4P10_UnpackBlock4x4 2320c1bc742181ded4930842b46e9507372f0b1b963James Dong 2330c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_UnpackBlock4x4 2340c1bc742181ded4930842b46e9507372f0b1b963James Dong 2350c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//-------------------------------------------------------- 2360c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// armVCM4P10_DequantLumaAC4x4 : static function inlined 2370c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//-------------------------------------------------------- 2380c1bc742181ded4930842b46e9507372f0b1b963James Dong 2390c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//BL armVCM4P10_DequantLumaAC4x4 2400c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR QP,QPOnStack ;// Set up r1 for armVCM4P10_DequantLumaAC4x4 2410c1bc742181ded4930842b46e9507372f0b1b963James Dong 2420c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR pQPmod,=armVCM4P10_QPModuloTable 2430c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR pQPdiv,=armVCM4P10_QPDivTable 2440c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR pVRow,=armVCM4P10_VMatrixU16 2450c1bc742181ded4930842b46e9507372f0b1b963James Dong 2460c1bc742181ded4930842b46e9507372f0b1b963James Dong 2470c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRSB QPmod,[pQPmod,QP] ;// (QP%6) * 6 2480c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRSB shift,[pQPdiv,QP] ;// Shift = QP / 6 2490c1bc742181ded4930842b46e9507372f0b1b963James Dong 2500c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR index1,=0x03020504 2510c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR index0,=0x05040100 ;// Indexes into dVmatrix 2520c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pVRow,pVRow,QPmod 2530c1bc742181ded4930842b46e9507372f0b1b963James Dong VDUP dindexRow0,index0 2540c1bc742181ded4930842b46e9507372f0b1b963James Dong VDUP dindexRow1,index1 2550c1bc742181ded4930842b46e9507372f0b1b963James Dong VDUP dShift,shift 2560c1bc742181ded4930842b46e9507372f0b1b963James Dong 2570c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Load all 4x4 pVRow[] values 2580c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 dVmatrix,[pVRow] ;// dVmatrix = [0d|0c|0b|0a] 2590c1bc742181ded4930842b46e9507372f0b1b963James Dong 2600c1bc742181ded4930842b46e9507372f0b1b963James Dong 2610c1bc742181ded4930842b46e9507372f0b1b963James Dong VTBL dVRow0,dVmatrix,dByteIndexRow0 ;// row0 = row2 = [pVRow[2] | pVRow[0] | pVRow[2] | pVRow[0]] 2620c1bc742181ded4930842b46e9507372f0b1b963James Dong VTBL dVRow1,dVmatrix,dByteIndexRow1 ;// row1 = row3 = [pVRow[1] | pVRow[2] | pVRow[1] | pVRow[2]] 2630c1bc742181ded4930842b46e9507372f0b1b963James Dong CMP pDCTemp,#0 2640c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Load all the 4x4 'src' values 2650c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 { dSrcRow0,dSrcRow1,dSrcRow2,dSrcRow3 },[pDelta] 2660c1bc742181ded4930842b46e9507372f0b1b963James Dong 2670c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHL dVRow0U16,dVRow0U16,dShift 2680c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHL dVRow1U16,dVRow1U16,dShift 2690c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRSHNE DCval,[pDCTemp] 2700c1bc742181ded4930842b46e9507372f0b1b963James Dong 2710c1bc742181ded4930842b46e9507372f0b1b963James Dong 2720c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Multiply src[] with pVRow[] 2730c1bc742181ded4930842b46e9507372f0b1b963James Dong VMUL dDqntRow0,dSrcRow0,dVRow0U16 2740c1bc742181ded4930842b46e9507372f0b1b963James Dong VMUL dDqntRow1,dSrcRow1,dVRow1U16 2750c1bc742181ded4930842b46e9507372f0b1b963James Dong VMUL dDqntRow2,dSrcRow2,dVRow2U16 2760c1bc742181ded4930842b46e9507372f0b1b963James Dong VMUL dDqntRow3,dSrcRow3,dVRow3U16 2770c1bc742181ded4930842b46e9507372f0b1b963James Dong 2780c1bc742181ded4930842b46e9507372f0b1b963James Dong 2790c1bc742181ded4930842b46e9507372f0b1b963James Dong 2800c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//------------------------------------------------------------- 2810c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// TransformResidual4x4 : Inlined to avoid Load/Stores 2820c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//------------------------------------------------------------- 2830c1bc742181ded4930842b46e9507372f0b1b963James Dong 2840c1bc742181ded4930842b46e9507372f0b1b963James Dong 2850c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//BL armVCM4P10_TransformResidual4x4 2860c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//STRHNE DCval,[pDelta] 2870c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVNE dIn0[0],DCval 2880c1bc742181ded4930842b46e9507372f0b1b963James Dong 2890c1bc742181ded4930842b46e9507372f0b1b963James Dong 2900c1bc742181ded4930842b46e9507372f0b1b963James Dong 2910c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//***************************************************************** 2920c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Transpose the input pixels : perform Row ops as Col ops 2930c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//***************************************************************** 2940c1bc742181ded4930842b46e9507372f0b1b963James Dong 2950c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN dIn0,dIn1 2960c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN dIn2,dIn3 2970c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qIn01,qIn23 2980c1bc742181ded4930842b46e9507372f0b1b963James Dong 2990c1bc742181ded4930842b46e9507372f0b1b963James Dong 3000c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOV dZero,#0 ;// Used to right shift by 1 3010c1bc742181ded4930842b46e9507372f0b1b963James Dong 3020c1bc742181ded4930842b46e9507372f0b1b963James Dong 3030c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//**************************************** 3040c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Row Operations (Performed on columns) 3050c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//**************************************** 3060c1bc742181ded4930842b46e9507372f0b1b963James Dong 3070c1bc742181ded4930842b46e9507372f0b1b963James Dong 3080c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD de0,dIn0,dIn2 ;// e0 = d0 + d2 3090c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB de1,dIn0,dIn2 ;// e1 = d0 - d2 3100c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD dIn1RS,dIn1,dZero ;// (f1>>1) constZero is a register holding 0 3110c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD dIn3RS,dIn3,dZero 3120c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB de2,dIn1RS,dIn3 ;// e2 = (d1>>1) - d3 3130c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD de3,dIn1,dIn3RS ;// e3 = d1 + (d3>>1) 3140c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD df0,de0,de3 ;// f0 = e0 + e3 3150c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD df1,de1,de2 ;// f1 = e1 + e2 3160c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB df2,de1,de2 ;// f2 = e1 - e2 3170c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB df3,de0,de3 ;// f3 = e0 - e3 3180c1bc742181ded4930842b46e9507372f0b1b963James Dong 3190c1bc742181ded4930842b46e9507372f0b1b963James Dong 3200c1bc742181ded4930842b46e9507372f0b1b963James Dong 3210c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//***************************************************************** 3220c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Transpose the resultant matrix 3230c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//***************************************************************** 3240c1bc742181ded4930842b46e9507372f0b1b963James Dong 3250c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN df0,df1 3260c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN df2,df3 3270c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qf01,qf23 3280c1bc742181ded4930842b46e9507372f0b1b963James Dong 3290c1bc742181ded4930842b46e9507372f0b1b963James Dong 3300c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//******************************* 3310c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Coloumn Operations 3320c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//******************************* 3330c1bc742181ded4930842b46e9507372f0b1b963James Dong 3340c1bc742181ded4930842b46e9507372f0b1b963James Dong 3350c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD dg0,df0,df2 ;// e0 = d0 + d2 3360c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB dg1,df0,df2 ;// e1 = d0 - d2 3370c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD df1RS,df1,dZero ;// (f1>>1) constZero is a register holding 0 3380c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD df3RS,df3,dZero 3390c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB dg2,df1RS,df3 ;// e2 = (d1>>1) - d3 3400c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD dg3,df1,df3RS ;// e3 = d1 + (d3>>1) 3410c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD dh0,dg0,dg3 ;// f0 = e0 + e3 3420c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD dh1,dg1,dg2 ;// f1 = e1 + e2 3430c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB dh2,dg1,dg2 ;// f2 = e1 - e2 3440c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB dh3,dg0,dg3 ;// f3 = e0 - e3 3450c1bc742181ded4930842b46e9507372f0b1b963James Dong 3460c1bc742181ded4930842b46e9507372f0b1b963James Dong 3470c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//************************************************ 3480c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Calculate final value (colOp[i][j] + 32)>>6 3490c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//************************************************ 3500c1bc742181ded4930842b46e9507372f0b1b963James Dong 3510c1bc742181ded4930842b46e9507372f0b1b963James Dong VRSHR dh0,#6 3520c1bc742181ded4930842b46e9507372f0b1b963James Dong VRSHR dh1,#6 3530c1bc742181ded4930842b46e9507372f0b1b963James Dong VRSHR dh2,#6 3540c1bc742181ded4930842b46e9507372f0b1b963James Dong VRSHR dh3,#6 3550c1bc742181ded4930842b46e9507372f0b1b963James Dong 3560c1bc742181ded4930842b46e9507372f0b1b963James Dong 3570c1bc742181ded4930842b46e9507372f0b1b963James Dong B OutDCcase 3580c1bc742181ded4930842b46e9507372f0b1b963James Dong 3590c1bc742181ded4930842b46e9507372f0b1b963James Dong 3600c1bc742181ded4930842b46e9507372f0b1b963James DongDCcase 3610c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Calculate the Transformed DCvalue : (DCval+32)>>6 3620c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRSH DCval,[pDCTemp] 3630c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD DCval,DCval,#32 3640c1bc742181ded4930842b46e9507372f0b1b963James Dong ASR DCval,DCval,#6 3650c1bc742181ded4930842b46e9507372f0b1b963James Dong 3660c1bc742181ded4930842b46e9507372f0b1b963James Dong VDUP dDeltaRow0, DCval ;// pDelta[0] = pDelta[1] = pDelta[2] = pDelta[3] = DCval 3670c1bc742181ded4930842b46e9507372f0b1b963James Dong VDUP dDeltaRow1, DCval ;// pDelta[4] = pDelta[5] = pDelta[6] = pDelta[7] = DCval 3680c1bc742181ded4930842b46e9507372f0b1b963James Dong VDUP dDeltaRow2, DCval ;// pDelta[8] = pDelta[9] = pDelta[10] = pDelta[11] = DCval 3690c1bc742181ded4930842b46e9507372f0b1b963James Dong VDUP dDeltaRow3, DCval 3700c1bc742181ded4930842b46e9507372f0b1b963James Dong 3710c1bc742181ded4930842b46e9507372f0b1b963James Dong 3720c1bc742181ded4930842b46e9507372f0b1b963James DongOutDCcase 3730c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR predstep,predStepOnStack 3740c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR dstStep,dstStepOnStack 3750c1bc742181ded4930842b46e9507372f0b1b963James Dong 3760c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR PredVal1,[pPredTemp],predstep 3770c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR PredVal2,[pPredTemp],predstep 3780c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOV dPredValRow01,PredVal1,PredVal2 3790c1bc742181ded4930842b46e9507372f0b1b963James Dong 3800c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR PredVal1,[pPredTemp],predstep 3810c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR PredVal2,[pPredTemp] 3820c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOV dPredValRow23,PredVal1,PredVal2 3830c1bc742181ded4930842b46e9507372f0b1b963James Dong 3840c1bc742181ded4930842b46e9507372f0b1b963James Dong 3850c1bc742181ded4930842b46e9507372f0b1b963James Dong VADDW qSumRow01,qDeltaRow01,dPredValRow01 3860c1bc742181ded4930842b46e9507372f0b1b963James Dong VADDW qSumRow23,qDeltaRow23,dPredValRow23 3870c1bc742181ded4930842b46e9507372f0b1b963James Dong VQMOVUN dDstRow01,qSumRow01 3880c1bc742181ded4930842b46e9507372f0b1b963James Dong VQMOVUN dDstRow23,qSumRow23 3890c1bc742181ded4930842b46e9507372f0b1b963James Dong 3900c1bc742181ded4930842b46e9507372f0b1b963James Dong 3910c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dDstRow0,[pDstTemp],dstStep 3920c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dDstRow1,[pDstTemp],dstStep 3930c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dDstRow2,[pDstTemp],dstStep 3940c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dDstRow3,[pDstTemp] 3950c1bc742181ded4930842b46e9507372f0b1b963James Dong 3960c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Set return value 3970c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV result,#OMX_Sts_NoErr 3980c1bc742181ded4930842b46e9507372f0b1b963James Dong 3990c1bc742181ded4930842b46e9507372f0b1b963James DongEnd 4000c1bc742181ded4930842b46e9507372f0b1b963James Dong 4010c1bc742181ded4930842b46e9507372f0b1b963James Dong 4020c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Write function tail 4030c1bc742181ded4930842b46e9507372f0b1b963James Dong 4040c1bc742181ded4930842b46e9507372f0b1b963James Dong M_END 4050c1bc742181ded4930842b46e9507372f0b1b963James Dong 4060c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF ;//CORTEXA8 4070c1bc742181ded4930842b46e9507372f0b1b963James Dong 4080c1bc742181ded4930842b46e9507372f0b1b963James Dong 4090c1bc742181ded4930842b46e9507372f0b1b963James Dong 4100c1bc742181ded4930842b46e9507372f0b1b963James Dong END 411