10c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Copyright (C) 2007 ARM Limited 378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Licensed under the Apache License, Version 2.0 (the "License"); 578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// you may not use this file except in compliance with the License. 678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// You may obtain a copy of the License at 778e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 878e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// http://www.apache.org/licenses/LICENSE-2.0 978e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 1078e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Unless required by applicable law or agreed to in writing, software 1178e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// distributed under the License is distributed on an "AS IS" BASIS, 1278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// See the License for the specific language governing permissions and 1478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// limitations under the License. 1578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 1678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 170c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 180c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Description: 190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// H.264 inverse quantize and transform module 200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 220c1bc742181ded4930842b46e9507372f0b1b963James Dong 230c1bc742181ded4930842b46e9507372f0b1b963James Dong 240c1bc742181ded4930842b46e9507372f0b1b963James Dong 250c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Include standard headers 260c1bc742181ded4930842b46e9507372f0b1b963James Dong 270c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE omxtypes_s.h 280c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE armCOMM_s.h 290c1bc742181ded4930842b46e9507372f0b1b963James Dong 300c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Import symbols required from other files 310c1bc742181ded4930842b46e9507372f0b1b963James Dong;// (For example tables) 320c1bc742181ded4930842b46e9507372f0b1b963James Dong 330c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_UnpackBlock4x4 340c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_TransformResidual4x4 350c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_QPDivTable 360c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_VMatrixU16 370c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_QPModuloTable 380c1bc742181ded4930842b46e9507372f0b1b963James Dong 390c1bc742181ded4930842b46e9507372f0b1b963James Dong M_VARIANTS ARM1136JS, ARM1136JS_U 400c1bc742181ded4930842b46e9507372f0b1b963James Dong 410c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Set debugging level 420c1bc742181ded4930842b46e9507372f0b1b963James Dong;//DEBUG_ON SETL {TRUE} 430c1bc742181ded4930842b46e9507372f0b1b963James Dong 440c1bc742181ded4930842b46e9507372f0b1b963James Dong 450c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Static Function: armVCM4P10_DequantLumaAC4x4 460c1bc742181ded4930842b46e9507372f0b1b963James Dong 470c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Guarding implementation by the processor name 480c1bc742181ded4930842b46e9507372f0b1b963James Dong 490c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ARM1136JS 500c1bc742181ded4930842b46e9507372f0b1b963James Dong 510c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Input Registers 520c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcDst RN 0 530c1bc742181ded4930842b46e9507372f0b1b963James DongQP RN 1 540c1bc742181ded4930842b46e9507372f0b1b963James Dong 550c1bc742181ded4930842b46e9507372f0b1b963James Dong 560c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Output Registers 570c1bc742181ded4930842b46e9507372f0b1b963James Dong 580c1bc742181ded4930842b46e9507372f0b1b963James Dong 590c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Local Scratch Registers 600c1bc742181ded4930842b46e9507372f0b1b963James DongpQPdiv RN 4 610c1bc742181ded4930842b46e9507372f0b1b963James DongpQPmod RN 5 620c1bc742181ded4930842b46e9507372f0b1b963James DongpVRow RN 2 630c1bc742181ded4930842b46e9507372f0b1b963James DongQPmod RN 6 640c1bc742181ded4930842b46e9507372f0b1b963James Dongshift RN 3 650c1bc742181ded4930842b46e9507372f0b1b963James DongrowLuma01 RN 1 660c1bc742181ded4930842b46e9507372f0b1b963James DongrowLuma23 RN 4 670c1bc742181ded4930842b46e9507372f0b1b963James Dong 680c1bc742181ded4930842b46e9507372f0b1b963James DongSrcDst00 RN 5 690c1bc742181ded4930842b46e9507372f0b1b963James DongSrcDst02 RN 6 700c1bc742181ded4930842b46e9507372f0b1b963James DongSrcDst10 RN 7 710c1bc742181ded4930842b46e9507372f0b1b963James DongSrcDst12 RN 8 720c1bc742181ded4930842b46e9507372f0b1b963James DongSrcDst20 RN 9 730c1bc742181ded4930842b46e9507372f0b1b963James DongSrcDst22 RN 10 740c1bc742181ded4930842b46e9507372f0b1b963James DongSrcDst30 RN 11 750c1bc742181ded4930842b46e9507372f0b1b963James DongSrcDst32 RN 12 760c1bc742181ded4930842b46e9507372f0b1b963James Dong 770c1bc742181ded4930842b46e9507372f0b1b963James Dongtemp1 RN 2 780c1bc742181ded4930842b46e9507372f0b1b963James Dongtemp2 RN 3 790c1bc742181ded4930842b46e9507372f0b1b963James Dongtemp3 RN 14 800c1bc742181ded4930842b46e9507372f0b1b963James Dong 810c1bc742181ded4930842b46e9507372f0b1b963James Dong 820c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Allocate stack memory required by the function 830c1bc742181ded4930842b46e9507372f0b1b963James Dong 840c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Write function header 850c1bc742181ded4930842b46e9507372f0b1b963James Dong M_START armVCM4P10_DequantLumaAC4x4,r11 860c1bc742181ded4930842b46e9507372f0b1b963James Dong 870c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR pQPmod,=armVCM4P10_QPModuloTable 880c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR pQPdiv,=armVCM4P10_QPDivTable 890c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR pVRow,=armVCM4P10_VMatrixU16 900c1bc742181ded4930842b46e9507372f0b1b963James Dong 910c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRSB QPmod,[pQPmod,QP] ;// (QP%6) * 6 920c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRSB shift,[pQPdiv,QP] ;// Shift = QP / 6 930c1bc742181ded4930842b46e9507372f0b1b963James Dong 940c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRH rowLuma01,[pVRow,QPmod]! ;// rowLuma01 = [00|0a] 950c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRH temp3,[pVRow,#2] ;// temp3 = [00|0b] 960c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRH rowLuma23,[pVRow,#4] ;// rowLuma23 = [00|0c] 970c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR rowLuma01,rowLuma01,temp3,LSL #16 ;// rowLuma01 = [0b|0a] 980c1bc742181ded4930842b46e9507372f0b1b963James Dong 990c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Load all the 16 'src' values 1000c1bc742181ded4930842b46e9507372f0b1b963James Dong LDMIA pSrcDst,{SrcDst00,SrcDst02,SrcDst10,SrcDst12,SrcDst20,SrcDst22,SrcDst30,SrcDst32} 1010c1bc742181ded4930842b46e9507372f0b1b963James Dong 1020c1bc742181ded4930842b46e9507372f0b1b963James Dong 1030c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//********************************************************************************************* 1040c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 1050c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 'Shift' ranges between [0,8] 1060c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// So we can shift the packed rowLuma values [0b|0a] with a single LSL operation 1070c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 1080c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//********************************************************************************************* 1090c1bc742181ded4930842b46e9507372f0b1b963James Dong 1100c1bc742181ded4930842b46e9507372f0b1b963James Dong LSL rowLuma01,rowLuma01,shift 1110c1bc742181ded4930842b46e9507372f0b1b963James Dong LSL rowLuma23,rowLuma23,shift 1120c1bc742181ded4930842b46e9507372f0b1b963James Dong 1130c1bc742181ded4930842b46e9507372f0b1b963James Dong 1140c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//********************************************************************************************** 1150c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 1160c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// The idea is to unroll the Loop completely 1170c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// All the 16 src values are loaded at once into 8 registers : SrcDst<y><x> (above) 1180c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 0<= armVCM4P10_PosToVCol4x4[i] <=2 for any 'i<16' 1190c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// So the only values of pVRow[i] that need to be loaded are for i=0,1,2 1200c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// These 3 values are loaded into rowLuma01 and rowLuma23 (above) 1210c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// We first calculate pVRow[armVCM4P10_PosToVCol4x4[i]]) << Shift which fits into 16 bits (above) 1220c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Then the product pSrcDst[i] * (pVRow[armVCM4P10_PosToVCol4x4[i]] << Shift) is calculated 1230c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Here we interleave the PKHBT operations for various rows to avoide pipeline stalls 1240c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 1250c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// We then pack the two 16 bit multiplication result into a word and store at one go 1260c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 1270c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//********************************************************************************************** 1280c1bc742181ded4930842b46e9507372f0b1b963James Dong 1290c1bc742181ded4930842b46e9507372f0b1b963James Dong 1300c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Row 1 1310c1bc742181ded4930842b46e9507372f0b1b963James Dong 1320c1bc742181ded4930842b46e9507372f0b1b963James Dong 1330c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTB temp1,SrcDst00,rowLuma23 ;// pSrcDst[1] * (pVRow[2]<<Shift) 1340c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULBB SrcDst00,SrcDst00,rowLuma01 ;// pSrcDst[0] * (pVRow[0]<<Shift) 1350c1bc742181ded4930842b46e9507372f0b1b963James Dong 1360c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTB temp2,SrcDst02,rowLuma23 ;// pSrcDst[3] * (pVRow[2]<<Shift) 1370c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULBB SrcDst02,SrcDst02,rowLuma01 ;// pSrcDst[2] * (pVRow[0]<<Shift) 1380c1bc742181ded4930842b46e9507372f0b1b963James Dong 1390c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT SrcDst00,SrcDst00,temp1,LSL #16 ;// Pack the first two product values 1400c1bc742181ded4930842b46e9507372f0b1b963James Dong 1410c1bc742181ded4930842b46e9507372f0b1b963James Dong 1420c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Row 2 1430c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTT temp1,SrcDst10,rowLuma01 ;// pSrcDst[5] * (pVRow[1]<<Shift) 1440c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULBB SrcDst10,SrcDst10,rowLuma23 ;// pSrcDst[4] * (pVRow[2]<<Shift) 1450c1bc742181ded4930842b46e9507372f0b1b963James Dong 1460c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT SrcDst02,SrcDst02,temp2,LSL #16 ;// Pack the next two product values 1470c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTT temp2,SrcDst12,rowLuma01 ;// pSrcDst[7] * (pVRow[1]<<Shift) 1480c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULBB SrcDst12,SrcDst12,rowLuma23 ;// pSrcDst[6] * (pVRow[2]<<Shift) 1490c1bc742181ded4930842b46e9507372f0b1b963James Dong 1500c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT SrcDst10,SrcDst10,temp1,LSL #16 ;// Pack the next two product values 1510c1bc742181ded4930842b46e9507372f0b1b963James Dong 1520c1bc742181ded4930842b46e9507372f0b1b963James Dong 1530c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Row 3 1540c1bc742181ded4930842b46e9507372f0b1b963James Dong 1550c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTB temp1,SrcDst20,rowLuma23 ;// pSrcDst[9] * (pVRow[2]<<Shift) 1560c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULBB SrcDst20,SrcDst20,rowLuma01 ;// pSrcDst[8] * (pVRow[0]<<Shift) 1570c1bc742181ded4930842b46e9507372f0b1b963James Dong 1580c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT SrcDst12,SrcDst12,temp2,LSL #16 ;// Pack the next two product values 1590c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTB temp2,SrcDst22,rowLuma23 ;// pSrcDst[11] * (pVRow[2]<<Shift) 1600c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULBB SrcDst22,SrcDst22,rowLuma01 ;// pSrcDst[10] * (pVRow[0]<<Shift) 1610c1bc742181ded4930842b46e9507372f0b1b963James Dong 1620c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT SrcDst20,SrcDst20,temp1,LSL #16 ;// Pack the next two product values 1630c1bc742181ded4930842b46e9507372f0b1b963James Dong 1640c1bc742181ded4930842b46e9507372f0b1b963James Dong 1650c1bc742181ded4930842b46e9507372f0b1b963James Dong 1660c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Row 4 1670c1bc742181ded4930842b46e9507372f0b1b963James Dong 1680c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTT temp1,SrcDst30,rowLuma01 ;// pSrcDst[13] * (pVRow[1]<<Shift) 1690c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULBB SrcDst30,SrcDst30,rowLuma23 ;// pSrcDst[12] * (pVRow[2]<<Shift) 1700c1bc742181ded4930842b46e9507372f0b1b963James Dong 1710c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTT temp3,SrcDst32,rowLuma01 ;// pSrcDst[15] * (pVRow[1]<<Shift) 1720c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULBB SrcDst32,SrcDst32,rowLuma23 ;// pSrcDst[14] * (pVRow[2]<<Shift) 1730c1bc742181ded4930842b46e9507372f0b1b963James Dong 1740c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT SrcDst22,SrcDst22,temp2,LSL #16 ;// Pack the remaining product values 1750c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT SrcDst30,SrcDst30,temp1,LSL #16 1760c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT SrcDst32,SrcDst32,temp3,LSL #16 1770c1bc742181ded4930842b46e9507372f0b1b963James Dong 1780c1bc742181ded4930842b46e9507372f0b1b963James Dong 1790c1bc742181ded4930842b46e9507372f0b1b963James Dong STMIA pSrcDst,{SrcDst00,SrcDst02,SrcDst10,SrcDst12,SrcDst20,SrcDst22,SrcDst30,SrcDst32} 1800c1bc742181ded4930842b46e9507372f0b1b963James Dong 1810c1bc742181ded4930842b46e9507372f0b1b963James Dong 1820c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Set return value 1830c1bc742181ded4930842b46e9507372f0b1b963James Dong 1840c1bc742181ded4930842b46e9507372f0b1b963James Dong 1850c1bc742181ded4930842b46e9507372f0b1b963James Dong 1860c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Write function tail 1870c1bc742181ded4930842b46e9507372f0b1b963James Dong M_END 1880c1bc742181ded4930842b46e9507372f0b1b963James Dong 1890c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF ;//ARM1136JS 1900c1bc742181ded4930842b46e9507372f0b1b963James Dong 1910c1bc742181ded4930842b46e9507372f0b1b963James Dong 1920c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Guarding implementation by the processor name 1930c1bc742181ded4930842b46e9507372f0b1b963James Dong 1940c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ARM1136JS_U 1950c1bc742181ded4930842b46e9507372f0b1b963James Dong 1960c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Input Registers 1970c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcDst RN 0 1980c1bc742181ded4930842b46e9507372f0b1b963James DongQP RN 1 1990c1bc742181ded4930842b46e9507372f0b1b963James Dong 2000c1bc742181ded4930842b46e9507372f0b1b963James Dong 2010c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Output Registers 2020c1bc742181ded4930842b46e9507372f0b1b963James Dong 2030c1bc742181ded4930842b46e9507372f0b1b963James Dong 2040c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Local Scratch Registers 2050c1bc742181ded4930842b46e9507372f0b1b963James DongpQPdiv RN 4 2060c1bc742181ded4930842b46e9507372f0b1b963James DongpQPmod RN 5 2070c1bc742181ded4930842b46e9507372f0b1b963James DongpVRow RN 2 2080c1bc742181ded4930842b46e9507372f0b1b963James DongQPmod RN 6 2090c1bc742181ded4930842b46e9507372f0b1b963James Dongshift RN 3 2100c1bc742181ded4930842b46e9507372f0b1b963James DongrowLuma01 RN 1 2110c1bc742181ded4930842b46e9507372f0b1b963James DongrowLuma23 RN 4 2120c1bc742181ded4930842b46e9507372f0b1b963James Dong 2130c1bc742181ded4930842b46e9507372f0b1b963James DongSrcDst00 RN 5 2140c1bc742181ded4930842b46e9507372f0b1b963James DongSrcDst02 RN 6 2150c1bc742181ded4930842b46e9507372f0b1b963James DongSrcDst10 RN 7 2160c1bc742181ded4930842b46e9507372f0b1b963James DongSrcDst12 RN 8 2170c1bc742181ded4930842b46e9507372f0b1b963James DongSrcDst20 RN 9 2180c1bc742181ded4930842b46e9507372f0b1b963James DongSrcDst22 RN 10 2190c1bc742181ded4930842b46e9507372f0b1b963James DongSrcDst30 RN 11 2200c1bc742181ded4930842b46e9507372f0b1b963James DongSrcDst32 RN 12 2210c1bc742181ded4930842b46e9507372f0b1b963James Dong 2220c1bc742181ded4930842b46e9507372f0b1b963James Dongtemp1 RN 2 2230c1bc742181ded4930842b46e9507372f0b1b963James Dongtemp2 RN 3 2240c1bc742181ded4930842b46e9507372f0b1b963James Dongtemp3 RN 14 2250c1bc742181ded4930842b46e9507372f0b1b963James Dong 2260c1bc742181ded4930842b46e9507372f0b1b963James Dong 2270c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Allocate stack memory required by the function 2280c1bc742181ded4930842b46e9507372f0b1b963James Dong 2290c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Write function header 2300c1bc742181ded4930842b46e9507372f0b1b963James Dong M_START armVCM4P10_DequantLumaAC4x4,r11 2310c1bc742181ded4930842b46e9507372f0b1b963James Dong 2320c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR pQPmod,=armVCM4P10_QPModuloTable 2330c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR pQPdiv,=armVCM4P10_QPDivTable 2340c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR pVRow,=armVCM4P10_VMatrixU16 2350c1bc742181ded4930842b46e9507372f0b1b963James Dong 2360c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRSB QPmod,[pQPmod,QP] ;// (QP%6) * 6 2370c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRSB shift,[pQPdiv,QP] ;// Shift = QP / 6 2380c1bc742181ded4930842b46e9507372f0b1b963James Dong 2390c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR rowLuma01,[pVRow,QPmod]! ;// rowLuma01 = [0b|0a] 2400c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR rowLuma23,[pVRow,#4] ;// rowLuma23 = [0d|0c] 2410c1bc742181ded4930842b46e9507372f0b1b963James Dong 2420c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Load all the 16 'src' values 2430c1bc742181ded4930842b46e9507372f0b1b963James Dong LDMIA pSrcDst,{SrcDst00,SrcDst02,SrcDst10,SrcDst12,SrcDst20,SrcDst22,SrcDst30,SrcDst32} 2440c1bc742181ded4930842b46e9507372f0b1b963James Dong 2450c1bc742181ded4930842b46e9507372f0b1b963James Dong 2460c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//********************************************************************************************* 2470c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 2480c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 'Shift' ranges between [0,8] 2490c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// So we can shift the packed rowLuma values [0b|0a] with a single LSL operation 2500c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 2510c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//********************************************************************************************* 2520c1bc742181ded4930842b46e9507372f0b1b963James Dong 2530c1bc742181ded4930842b46e9507372f0b1b963James Dong LSL rowLuma01,rowLuma01,shift 2540c1bc742181ded4930842b46e9507372f0b1b963James Dong LSL rowLuma23,rowLuma23,shift 2550c1bc742181ded4930842b46e9507372f0b1b963James Dong 2560c1bc742181ded4930842b46e9507372f0b1b963James Dong 2570c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//********************************************************************************************** 2580c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 2590c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// The idea is to unroll the Loop completely 2600c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// All the 16 src values are loaded at once into 8 registers : SrcDst<y><x> (above) 2610c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 0<= armVCM4P10_PosToVCol4x4[i] <=2 for any 'i<16' 2620c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// So the only values of pVRow[i] that need to be loaded are for i=0,1,2 2630c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// These 3 values are loaded into rowLuma01 and rowLuma23 (above) 2640c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// We first calculate pVRow[armVCM4P10_PosToVCol4x4[i]]) << Shift which fits into 16 bits (above) 2650c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Then the product pSrcDst[i] * (pVRow[armVCM4P10_PosToVCol4x4[i]] << Shift) is calculated 2660c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Here we interleave the PKHBT operations for various rows to avoide pipeline stalls 2670c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 2680c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// We then pack the two 16 bit multiplication result into a word and store at one go 2690c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 2700c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//********************************************************************************************** 2710c1bc742181ded4930842b46e9507372f0b1b963James Dong 2720c1bc742181ded4930842b46e9507372f0b1b963James Dong 2730c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Row 1 2740c1bc742181ded4930842b46e9507372f0b1b963James Dong 2750c1bc742181ded4930842b46e9507372f0b1b963James Dong 2760c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTB temp1,SrcDst00,rowLuma23 ;// pSrcDst[1] * (pVRow[2]<<Shift) 2770c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULBB SrcDst00,SrcDst00,rowLuma01 ;// pSrcDst[0] * (pVRow[0]<<Shift) 2780c1bc742181ded4930842b46e9507372f0b1b963James Dong 2790c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTB temp2,SrcDst02,rowLuma23 ;// pSrcDst[3] * (pVRow[2]<<Shift) 2800c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULBB SrcDst02,SrcDst02,rowLuma01 ;// pSrcDst[2] * (pVRow[0]<<Shift) 2810c1bc742181ded4930842b46e9507372f0b1b963James Dong 2820c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT SrcDst00,SrcDst00,temp1,LSL #16 ;// Pack the first two product values 2830c1bc742181ded4930842b46e9507372f0b1b963James Dong 2840c1bc742181ded4930842b46e9507372f0b1b963James Dong 2850c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Row 2 2860c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTT temp1,SrcDst10,rowLuma01 ;// pSrcDst[5] * (pVRow[1]<<Shift) 2870c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULBB SrcDst10,SrcDst10,rowLuma23 ;// pSrcDst[4] * (pVRow[2]<<Shift) 2880c1bc742181ded4930842b46e9507372f0b1b963James Dong 2890c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT SrcDst02,SrcDst02,temp2,LSL #16 ;// Pack the next two product values 2900c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTT temp2,SrcDst12,rowLuma01 ;// pSrcDst[7] * (pVRow[1]<<Shift) 2910c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULBB SrcDst12,SrcDst12,rowLuma23 ;// pSrcDst[6] * (pVRow[2]<<Shift) 2920c1bc742181ded4930842b46e9507372f0b1b963James Dong 2930c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT SrcDst10,SrcDst10,temp1,LSL #16 ;// Pack the next two product values 2940c1bc742181ded4930842b46e9507372f0b1b963James Dong 2950c1bc742181ded4930842b46e9507372f0b1b963James Dong 2960c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Row 3 2970c1bc742181ded4930842b46e9507372f0b1b963James Dong 2980c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTB temp1,SrcDst20,rowLuma23 ;// pSrcDst[9] * (pVRow[2]<<Shift) 2990c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULBB SrcDst20,SrcDst20,rowLuma01 ;// pSrcDst[8] * (pVRow[0]<<Shift) 3000c1bc742181ded4930842b46e9507372f0b1b963James Dong 3010c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT SrcDst12,SrcDst12,temp2,LSL #16 ;// Pack the next two product values 3020c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTB temp2,SrcDst22,rowLuma23 ;// pSrcDst[11] * (pVRow[2]<<Shift) 3030c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULBB SrcDst22,SrcDst22,rowLuma01 ;// pSrcDst[10] * (pVRow[0]<<Shift) 3040c1bc742181ded4930842b46e9507372f0b1b963James Dong 3050c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT SrcDst20,SrcDst20,temp1,LSL #16 ;// Pack the next two product values 3060c1bc742181ded4930842b46e9507372f0b1b963James Dong 3070c1bc742181ded4930842b46e9507372f0b1b963James Dong 3080c1bc742181ded4930842b46e9507372f0b1b963James Dong 3090c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Row 4 3100c1bc742181ded4930842b46e9507372f0b1b963James Dong 3110c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTT temp1,SrcDst30,rowLuma01 ;// pSrcDst[13] * (pVRow[1]<<Shift) 3120c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULBB SrcDst30,SrcDst30,rowLuma23 ;// pSrcDst[12] * (pVRow[2]<<Shift) 3130c1bc742181ded4930842b46e9507372f0b1b963James Dong 3140c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTT temp3,SrcDst32,rowLuma01 ;// pSrcDst[15] * (pVRow[1]<<Shift) 3150c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULBB SrcDst32,SrcDst32,rowLuma23 ;// pSrcDst[14] * (pVRow[2]<<Shift) 3160c1bc742181ded4930842b46e9507372f0b1b963James Dong 3170c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT SrcDst22,SrcDst22,temp2,LSL #16 ;// Pack the remaining product values 3180c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT SrcDst30,SrcDst30,temp1,LSL #16 3190c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT SrcDst32,SrcDst32,temp3,LSL #16 3200c1bc742181ded4930842b46e9507372f0b1b963James Dong 3210c1bc742181ded4930842b46e9507372f0b1b963James Dong 3220c1bc742181ded4930842b46e9507372f0b1b963James Dong STMIA pSrcDst,{SrcDst00,SrcDst02,SrcDst10,SrcDst12,SrcDst20,SrcDst22,SrcDst30,SrcDst32} 3230c1bc742181ded4930842b46e9507372f0b1b963James Dong 3240c1bc742181ded4930842b46e9507372f0b1b963James Dong 3250c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Set return value 3260c1bc742181ded4930842b46e9507372f0b1b963James Dong 3270c1bc742181ded4930842b46e9507372f0b1b963James Dong 3280c1bc742181ded4930842b46e9507372f0b1b963James Dong 3290c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Write function tail 3300c1bc742181ded4930842b46e9507372f0b1b963James Dong M_END 3310c1bc742181ded4930842b46e9507372f0b1b963James Dong 3320c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF ;//ARM1136JS_U 3330c1bc742181ded4930842b46e9507372f0b1b963James Dong 3340c1bc742181ded4930842b46e9507372f0b1b963James Dong 3350c1bc742181ded4930842b46e9507372f0b1b963James Dong 3360c1bc742181ded4930842b46e9507372f0b1b963James Dong 3370c1bc742181ded4930842b46e9507372f0b1b963James Dong 3380c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd 3390c1bc742181ded4930842b46e9507372f0b1b963James Dong 3400c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Guarding implementation by the processor name 3410c1bc742181ded4930842b46e9507372f0b1b963James Dong 3420c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ARM1136JS 3430c1bc742181ded4930842b46e9507372f0b1b963James Dong 3440c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Input Registers 3450c1bc742181ded4930842b46e9507372f0b1b963James DongppSrc RN 0 3460c1bc742181ded4930842b46e9507372f0b1b963James DongpPred RN 1 3470c1bc742181ded4930842b46e9507372f0b1b963James DongpDC RN 2 3480c1bc742181ded4930842b46e9507372f0b1b963James DongpDst RN 3 3490c1bc742181ded4930842b46e9507372f0b1b963James Dong 3500c1bc742181ded4930842b46e9507372f0b1b963James Dong 3510c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Output Registers 3520c1bc742181ded4930842b46e9507372f0b1b963James Dongresult RN 0 3530c1bc742181ded4930842b46e9507372f0b1b963James Dong 3540c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Local Scratch Registers 3550c1bc742181ded4930842b46e9507372f0b1b963James DongpDelta RN 4 3560c1bc742181ded4930842b46e9507372f0b1b963James DongpDeltaTmp RN 6 3570c1bc742181ded4930842b46e9507372f0b1b963James DongAC RN 5 ;//Load from stack 3580c1bc742181ded4930842b46e9507372f0b1b963James DongpPredTemp RN 7 3590c1bc742181ded4930842b46e9507372f0b1b963James DongpDCTemp RN 8 3600c1bc742181ded4930842b46e9507372f0b1b963James DongpDstTemp RN 9 3610c1bc742181ded4930842b46e9507372f0b1b963James DongpDeltaArg1 RN 1 3620c1bc742181ded4930842b46e9507372f0b1b963James DongpDeltaArg0 RN 0 3630c1bc742181ded4930842b46e9507372f0b1b963James DongQP RN 1 ;//Load from stack 3640c1bc742181ded4930842b46e9507372f0b1b963James DongDCval RN 10 3650c1bc742181ded4930842b46e9507372f0b1b963James DongDCvalCopy RN 11 3660c1bc742181ded4930842b46e9507372f0b1b963James Dongpredstep RN 1 3670c1bc742181ded4930842b46e9507372f0b1b963James DongdstStep RN 10 3680c1bc742181ded4930842b46e9507372f0b1b963James Dongycounter RN 0 3690c1bc742181ded4930842b46e9507372f0b1b963James DongPredVal1 RN 3 3700c1bc742181ded4930842b46e9507372f0b1b963James DongPredVal2 RN 5 3710c1bc742181ded4930842b46e9507372f0b1b963James DongDeltaVal1 RN 2 3720c1bc742181ded4930842b46e9507372f0b1b963James DongDeltaVal2 RN 11 3730c1bc742181ded4930842b46e9507372f0b1b963James DongPredVal RN 8 3740c1bc742181ded4930842b46e9507372f0b1b963James DongtmpDeltaVal RN 6 3750c1bc742181ded4930842b46e9507372f0b1b963James Dongsum1 RN 12 3760c1bc742181ded4930842b46e9507372f0b1b963James Dongsum2 RN 14 3770c1bc742181ded4930842b46e9507372f0b1b963James Dong 3780c1bc742181ded4930842b46e9507372f0b1b963James Dong 3790c1bc742181ded4930842b46e9507372f0b1b963James Dong 3800c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Allocate stack memory required by the function 3810c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC8 pBuffer, 32 3820c1bc742181ded4930842b46e9507372f0b1b963James Dong 3830c1bc742181ded4930842b46e9507372f0b1b963James Dong 3840c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Write function header 3850c1bc742181ded4930842b46e9507372f0b1b963James Dong M_START omxVCM4P10_DequantTransformResidualFromPairAndAdd,r11 3860c1bc742181ded4930842b46e9507372f0b1b963James Dong 3870c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Define stack arguments 3880c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG predStepOnStack, 4 3890c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG dstStepOnStack,4 3900c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG QPOnStack, 4 3910c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG ACOnStack,4 3920c1bc742181ded4930842b46e9507372f0b1b963James Dong 3930c1bc742181ded4930842b46e9507372f0b1b963James Dong 3940c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pDelta,pBuffer 3950c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR AC,ACOnStack 3960c1bc742181ded4930842b46e9507372f0b1b963James Dong 3970c1bc742181ded4930842b46e9507372f0b1b963James Dong 3980c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Save registers r1,r2,r3 before function call 3990c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV pPredTemp,pPred 4000c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV pDCTemp,pDC 4010c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV pDstTemp,pDst 4020c1bc742181ded4930842b46e9507372f0b1b963James Dong 4030c1bc742181ded4930842b46e9507372f0b1b963James Dong CMP AC,#0 4040c1bc742181ded4930842b46e9507372f0b1b963James Dong BEQ DCcase 4050c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV pDeltaArg1,pDelta ;// Set up r1 for armVCM4P10_UnpackBlock4x4 4060c1bc742181ded4930842b46e9507372f0b1b963James Dong 4070c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_UnpackBlock4x4 4080c1bc742181ded4930842b46e9507372f0b1b963James Dong 4090c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR QP,QPOnStack ;// Set up r1 for DequantLumaAC4x4 4100c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV pDeltaArg0,pDelta ;// Set up r0 for DequantLumaAC4x4 4110c1bc742181ded4930842b46e9507372f0b1b963James Dong 4120c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_DequantLumaAC4x4 4130c1bc742181ded4930842b46e9507372f0b1b963James Dong 4140c1bc742181ded4930842b46e9507372f0b1b963James Dong 4150c1bc742181ded4930842b46e9507372f0b1b963James Dong CMP pDCTemp,#0 4160c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRSHNE DCval,[pDCTemp] 4170c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV pDeltaArg0,pDelta ;// Set up r0 for armVCM4P10_TransformResidual4x4 4180c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV pDeltaArg1,pDelta ;// Set up r1 for armVCM4P10_TransformResidual4x4 4190c1bc742181ded4930842b46e9507372f0b1b963James Dong STRHNE DCval,[pDelta] 4200c1bc742181ded4930842b46e9507372f0b1b963James Dong 4210c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_TransformResidual4x4 4220c1bc742181ded4930842b46e9507372f0b1b963James Dong B OutDCcase 4230c1bc742181ded4930842b46e9507372f0b1b963James Dong 4240c1bc742181ded4930842b46e9507372f0b1b963James Dong 4250c1bc742181ded4930842b46e9507372f0b1b963James DongDCcase 4260c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRSH DCval,[pDCTemp] 4270c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD DCval,DCval,#32 4280c1bc742181ded4930842b46e9507372f0b1b963James Dong ASR DCval,DCval,#6 4290c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT DCval,DCval,DCval,LSL #16 ;// Duplicating the Lower halfword 4300c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV DCvalCopy, DCval ;// Needed for STRD 4310c1bc742181ded4930842b46e9507372f0b1b963James Dong STRD DCval, [pDelta, #0] ;// pDelta[0] = pDelta[1] = pDelta[2] = pDelta[3] = DCval 4320c1bc742181ded4930842b46e9507372f0b1b963James Dong STRD DCval, [pDelta, #8] ;// pDelta[4] = pDelta[5] = pDelta[6] = pDelta[7] = DCval 4330c1bc742181ded4930842b46e9507372f0b1b963James Dong STRD DCval, [pDelta, #16] ;// pDelta[8] = pDelta[9] = pDelta[10] = pDelta[11] = DCval 4340c1bc742181ded4930842b46e9507372f0b1b963James Dong STRD DCval, [pDelta, #24] 4350c1bc742181ded4930842b46e9507372f0b1b963James Dong 4360c1bc742181ded4930842b46e9507372f0b1b963James Dong 4370c1bc742181ded4930842b46e9507372f0b1b963James DongOutDCcase 4380c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR predstep,predStepOnStack 4390c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR dstStep,dstStepOnStack 4400c1bc742181ded4930842b46e9507372f0b1b963James Dong 4410c1bc742181ded4930842b46e9507372f0b1b963James Dong LDMIA pDelta!,{tmpDeltaVal,DeltaVal2} ;// Pre load 4420c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV ycounter,#4 ;// Counter for the PredPlusDeltaLoop 4430c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR PredVal,[pPredTemp] ;// Pre load 4440c1bc742181ded4930842b46e9507372f0b1b963James Dong 4450c1bc742181ded4930842b46e9507372f0b1b963James DongPredPlusDeltaLoop 4460c1bc742181ded4930842b46e9507372f0b1b963James Dong 4470c1bc742181ded4930842b46e9507372f0b1b963James Dong 4480c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS ycounter,ycounter,#1 4490c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pPredTemp,pPredTemp,predstep ;// Increment pPred ptr 4500c1bc742181ded4930842b46e9507372f0b1b963James Dong 4510c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT DeltaVal1,tmpDeltaVal,DeltaVal2,LSL #16 ;// Deltaval1 = [C A] 4520c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB DeltaVal2,DeltaVal2,tmpDeltaVal,ASR #16 ;// DeltaVal2 = [D B] 4530c1bc742181ded4930842b46e9507372f0b1b963James Dong 4540c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 PredVal1,PredVal ;// PredVal1 = [0c0a] 4550c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 PredVal2,PredVal,ROR #8 ;// PredVal2 = [0d0b] 4560c1bc742181ded4930842b46e9507372f0b1b963James Dong 4570c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRGT PredVal,[pPredTemp] ;// Pre load 4580c1bc742181ded4930842b46e9507372f0b1b963James Dong 4590c1bc742181ded4930842b46e9507372f0b1b963James Dong QADD16 sum2,DeltaVal2,PredVal2 ;// Add and saturate to 16 bits 4600c1bc742181ded4930842b46e9507372f0b1b963James Dong QADD16 sum1,DeltaVal1,PredVal1 4610c1bc742181ded4930842b46e9507372f0b1b963James Dong 4620c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 sum2,#8,sum2 ;// armClip(0,255,sum2) 4630c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 sum1,#8,sum1 4640c1bc742181ded4930842b46e9507372f0b1b963James Dong 4650c1bc742181ded4930842b46e9507372f0b1b963James Dong LDMGTIA pDelta!,{tmpDeltaVal,DeltaVal2} ;// Pre load 4660c1bc742181ded4930842b46e9507372f0b1b963James Dong 4670c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR sum1,sum1,sum2,LSL #8 ;// sum1 = [dcba] 4680c1bc742181ded4930842b46e9507372f0b1b963James Dong STR sum1,[pDstTemp] 4690c1bc742181ded4930842b46e9507372f0b1b963James Dong 4700c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDstTemp,pDstTemp,dstStep ;// Increment pDst ptr 4710c1bc742181ded4930842b46e9507372f0b1b963James Dong BGT PredPlusDeltaLoop 4720c1bc742181ded4930842b46e9507372f0b1b963James Dong 4730c1bc742181ded4930842b46e9507372f0b1b963James Dong 4740c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Set return value 4750c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV result,#OMX_Sts_NoErr 4760c1bc742181ded4930842b46e9507372f0b1b963James Dong 4770c1bc742181ded4930842b46e9507372f0b1b963James DongEnd 4780c1bc742181ded4930842b46e9507372f0b1b963James Dong 4790c1bc742181ded4930842b46e9507372f0b1b963James Dong 4800c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Write function tail 4810c1bc742181ded4930842b46e9507372f0b1b963James Dong 4820c1bc742181ded4930842b46e9507372f0b1b963James Dong M_END 4830c1bc742181ded4930842b46e9507372f0b1b963James Dong 4840c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF ;//ARM1136JS 4850c1bc742181ded4930842b46e9507372f0b1b963James Dong 4860c1bc742181ded4930842b46e9507372f0b1b963James Dong 4870c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd 4880c1bc742181ded4930842b46e9507372f0b1b963James Dong 4890c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Guarding implementation by the processor name 4900c1bc742181ded4930842b46e9507372f0b1b963James Dong 4910c1bc742181ded4930842b46e9507372f0b1b963James Dong 4920c1bc742181ded4930842b46e9507372f0b1b963James Dong 4930c1bc742181ded4930842b46e9507372f0b1b963James Dong 4940c1bc742181ded4930842b46e9507372f0b1b963James Dong END 495