10c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Copyright (C) 2007-2008 ARM Limited
378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Licensed under the Apache License, Version 2.0 (the "License");
578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// you may not use this file except in compliance with the License.
678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// You may obtain a copy of the License at
778e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
878e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//      http://www.apache.org/licenses/LICENSE-2.0
978e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
1078e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Unless required by applicable law or agreed to in writing, software
1178e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// distributed under the License is distributed on an "AS IS" BASIS,
1278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// See the License for the specific language governing permissions and
1478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// limitations under the License.
1578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
1678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
170c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
180c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name:  omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s
190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2
200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision:   12290
210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date:       Wednesday, April 9, 2008
220c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
230c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
240c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
250c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
260c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Description:
270c1bc742181ded4930842b46e9507372f0b1b963James Dong;// H.264 inverse quantize and transform module
280c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
290c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
300c1bc742181ded4930842b46e9507372f0b1b963James Dong
310c1bc742181ded4930842b46e9507372f0b1b963James Dong
320c1bc742181ded4930842b46e9507372f0b1b963James Dong
330c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Include standard headers
340c1bc742181ded4930842b46e9507372f0b1b963James Dong
350c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE omxtypes_s.h
360c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE armCOMM_s.h
370c1bc742181ded4930842b46e9507372f0b1b963James Dong
380c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Import symbols required from other files
390c1bc742181ded4930842b46e9507372f0b1b963James Dong;// (For example tables)
400c1bc742181ded4930842b46e9507372f0b1b963James Dong
410c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_UnpackBlock4x4
420c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_TransformResidual4x4
430c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_QPDivTable
440c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_VMatrixU16
450c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_QPModuloTable
460c1bc742181ded4930842b46e9507372f0b1b963James Dong
470c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_VARIANTS CortexA8
480c1bc742181ded4930842b46e9507372f0b1b963James Dong
490c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Set debugging level
500c1bc742181ded4930842b46e9507372f0b1b963James Dong;//DEBUG_ON    SETL {TRUE}
510c1bc742181ded4930842b46e9507372f0b1b963James Dong
520c1bc742181ded4930842b46e9507372f0b1b963James Dong
530c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Static Function: armVCM4P10_DequantLumaAC4x4
540c1bc742181ded4930842b46e9507372f0b1b963James Dong
550c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Guarding implementation by the processor name
560c1bc742181ded4930842b46e9507372f0b1b963James Dong
570c1bc742181ded4930842b46e9507372f0b1b963James Dong
580c1bc742181ded4930842b46e9507372f0b1b963James Dong
590c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Guarding implementation by the processor name
600c1bc742181ded4930842b46e9507372f0b1b963James Dong
610c1bc742181ded4930842b46e9507372f0b1b963James Dong
620c1bc742181ded4930842b46e9507372f0b1b963James Dong
630c1bc742181ded4930842b46e9507372f0b1b963James Dong
640c1bc742181ded4930842b46e9507372f0b1b963James Dong
650c1bc742181ded4930842b46e9507372f0b1b963James Dong
660c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd
670c1bc742181ded4930842b46e9507372f0b1b963James Dong
680c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Guarding implementation by the processor name
690c1bc742181ded4930842b46e9507372f0b1b963James Dong
700c1bc742181ded4930842b46e9507372f0b1b963James Dong
710c1bc742181ded4930842b46e9507372f0b1b963James Dong
720c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd
730c1bc742181ded4930842b46e9507372f0b1b963James Dong
740c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Guarding implementation by the processor name
750c1bc742181ded4930842b46e9507372f0b1b963James Dong
760c1bc742181ded4930842b46e9507372f0b1b963James Dong    IF  CortexA8
770c1bc742181ded4930842b46e9507372f0b1b963James Dong
780c1bc742181ded4930842b46e9507372f0b1b963James Dong
790c1bc742181ded4930842b46e9507372f0b1b963James Dong;// ARM Registers
800c1bc742181ded4930842b46e9507372f0b1b963James Dong
810c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Input Registers
820c1bc742181ded4930842b46e9507372f0b1b963James DongppSrc       RN  0
830c1bc742181ded4930842b46e9507372f0b1b963James DongpPred       RN  1
840c1bc742181ded4930842b46e9507372f0b1b963James DongpDC         RN  2
850c1bc742181ded4930842b46e9507372f0b1b963James DongpDst        RN  3
860c1bc742181ded4930842b46e9507372f0b1b963James Dong
870c1bc742181ded4930842b46e9507372f0b1b963James Dong
880c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Output Registers
890c1bc742181ded4930842b46e9507372f0b1b963James Dongresult      RN  0
900c1bc742181ded4930842b46e9507372f0b1b963James Dong
910c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Local Scratch Registers
920c1bc742181ded4930842b46e9507372f0b1b963James Dong
930c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Registers used in armVCM4P10_DequantLumaAC4x4
940c1bc742181ded4930842b46e9507372f0b1b963James DongpQPdiv      RN  10
950c1bc742181ded4930842b46e9507372f0b1b963James DongpQPmod      RN  11
960c1bc742181ded4930842b46e9507372f0b1b963James DongpVRow       RN  2
970c1bc742181ded4930842b46e9507372f0b1b963James DongQPmod       RN  12
980c1bc742181ded4930842b46e9507372f0b1b963James Dongshift       RN  14
990c1bc742181ded4930842b46e9507372f0b1b963James Dongindex0      RN  1
1000c1bc742181ded4930842b46e9507372f0b1b963James Dongindex1      RN  10
1010c1bc742181ded4930842b46e9507372f0b1b963James Dong
1020c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Registers used in DequantTransformResidualFromPairAndAdd
1030c1bc742181ded4930842b46e9507372f0b1b963James DongpDelta      RN  4
1040c1bc742181ded4930842b46e9507372f0b1b963James DongpDeltaTmp   RN  6
1050c1bc742181ded4930842b46e9507372f0b1b963James DongAC          RN  5                   ;//Load from stack
1060c1bc742181ded4930842b46e9507372f0b1b963James DongpPredTemp   RN  7
1070c1bc742181ded4930842b46e9507372f0b1b963James DongpDCTemp     RN  8
1080c1bc742181ded4930842b46e9507372f0b1b963James DongpDstTemp    RN  9
1090c1bc742181ded4930842b46e9507372f0b1b963James DongpDeltaArg1  RN  1
1100c1bc742181ded4930842b46e9507372f0b1b963James DongpDeltaArg0  RN  0
1110c1bc742181ded4930842b46e9507372f0b1b963James DongQP          RN  1                   ;//Load from stack
1120c1bc742181ded4930842b46e9507372f0b1b963James DongDCval       RN  10
1130c1bc742181ded4930842b46e9507372f0b1b963James Dongpredstep    RN  1
1140c1bc742181ded4930842b46e9507372f0b1b963James DongdstStep     RN  10
1150c1bc742181ded4930842b46e9507372f0b1b963James DongPredVal1    RN  3
1160c1bc742181ded4930842b46e9507372f0b1b963James DongPredVal2    RN  5
1170c1bc742181ded4930842b46e9507372f0b1b963James Dong
1180c1bc742181ded4930842b46e9507372f0b1b963James Dong
1190c1bc742181ded4930842b46e9507372f0b1b963James Dong
1200c1bc742181ded4930842b46e9507372f0b1b963James Dong
1210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Neon Registers
1220c1bc742181ded4930842b46e9507372f0b1b963James Dong
1230c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers used in armVCM4P10_DequantLumaAC4x4
1240c1bc742181ded4930842b46e9507372f0b1b963James Dong
1250c1bc742181ded4930842b46e9507372f0b1b963James DongdVmatrix            DN  D6.8
1260c1bc742181ded4930842b46e9507372f0b1b963James DongdindexRow0          DN  D7.32
1270c1bc742181ded4930842b46e9507372f0b1b963James DongdindexRow1          DN  D9.32
1280c1bc742181ded4930842b46e9507372f0b1b963James DongdByteIndexRow0      DN  D7.8
1290c1bc742181ded4930842b46e9507372f0b1b963James DongdByteIndexRow1      DN  D9.8
1300c1bc742181ded4930842b46e9507372f0b1b963James DongdVRow0              DN  D8.8
1310c1bc742181ded4930842b46e9507372f0b1b963James DongdVRow1              DN  D4.8
1320c1bc742181ded4930842b46e9507372f0b1b963James DongdVRow0U16           DN  D8.U16
1330c1bc742181ded4930842b46e9507372f0b1b963James DongdVRow1U16           DN  D4.U16
1340c1bc742181ded4930842b46e9507372f0b1b963James DongdVRow2U16           DN  D8.U16
1350c1bc742181ded4930842b46e9507372f0b1b963James DongdVRow3U16           DN  D4.U16
1360c1bc742181ded4930842b46e9507372f0b1b963James Dong
1370c1bc742181ded4930842b46e9507372f0b1b963James DongdShift              DN  D5.U16
1380c1bc742181ded4930842b46e9507372f0b1b963James DongdSrcRow0            DN  D0.I16
1390c1bc742181ded4930842b46e9507372f0b1b963James DongdSrcRow1            DN  D1.I16
1400c1bc742181ded4930842b46e9507372f0b1b963James DongdSrcRow2            DN  D2.I16
1410c1bc742181ded4930842b46e9507372f0b1b963James DongdSrcRow3            DN  D3.I16
1420c1bc742181ded4930842b46e9507372f0b1b963James DongdDqntRow0           DN  D0.I16
1430c1bc742181ded4930842b46e9507372f0b1b963James DongdDqntRow1           DN  D1.I16
1440c1bc742181ded4930842b46e9507372f0b1b963James DongdDqntRow2           DN  D2.I16
1450c1bc742181ded4930842b46e9507372f0b1b963James DongdDqntRow3           DN  D3.I16
1460c1bc742181ded4930842b46e9507372f0b1b963James Dong
1470c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers used in TransformResidual4x4
1480c1bc742181ded4930842b46e9507372f0b1b963James Dong
1490c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Packed Input pixels
1500c1bc742181ded4930842b46e9507372f0b1b963James DongdIn0                DN  D0.S16
1510c1bc742181ded4930842b46e9507372f0b1b963James DongdIn1                DN  D1.S16
1520c1bc742181ded4930842b46e9507372f0b1b963James DongdIn2                DN  D2.S16
1530c1bc742181ded4930842b46e9507372f0b1b963James DongdIn3                DN  D3.S16
1540c1bc742181ded4930842b46e9507372f0b1b963James DongqIn01               QN  Q0.32
1550c1bc742181ded4930842b46e9507372f0b1b963James DongqIn23               QN  Q1.32
1560c1bc742181ded4930842b46e9507372f0b1b963James Dong
1570c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Intermediate calculations
1580c1bc742181ded4930842b46e9507372f0b1b963James DongdZero               DN  D4.S16
1590c1bc742181ded4930842b46e9507372f0b1b963James Dongde0                 DN  D5.S16
1600c1bc742181ded4930842b46e9507372f0b1b963James Dongde1                 DN  D6.S16
1610c1bc742181ded4930842b46e9507372f0b1b963James Dongde2                 DN  D7.S16
1620c1bc742181ded4930842b46e9507372f0b1b963James Dongde3                 DN  D8.S16
1630c1bc742181ded4930842b46e9507372f0b1b963James DongdIn1RS              DN  D7.S16
1640c1bc742181ded4930842b46e9507372f0b1b963James DongdIn3RS              DN  D8.S16
1650c1bc742181ded4930842b46e9507372f0b1b963James Dongdf0                 DN  D0.S16
1660c1bc742181ded4930842b46e9507372f0b1b963James Dongdf1                 DN  D1.S16
1670c1bc742181ded4930842b46e9507372f0b1b963James Dongdf2                 DN  D2.S16
1680c1bc742181ded4930842b46e9507372f0b1b963James Dongdf3                 DN  D3.S16
1690c1bc742181ded4930842b46e9507372f0b1b963James Dongqf01                QN  Q0.32
1700c1bc742181ded4930842b46e9507372f0b1b963James Dongqf23                QN  Q1.32
1710c1bc742181ded4930842b46e9507372f0b1b963James Dongdg0                 DN  D5.S16
1720c1bc742181ded4930842b46e9507372f0b1b963James Dongdg1                 DN  D6.S16
1730c1bc742181ded4930842b46e9507372f0b1b963James Dongdg2                 DN  D7.S16
1740c1bc742181ded4930842b46e9507372f0b1b963James Dongdg3                 DN  D8.S16
1750c1bc742181ded4930842b46e9507372f0b1b963James Dongdf1RS               DN  D7.S16
1760c1bc742181ded4930842b46e9507372f0b1b963James Dongdf3RS               DN  D8.S16
1770c1bc742181ded4930842b46e9507372f0b1b963James Dong
1780c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Output pixels
1790c1bc742181ded4930842b46e9507372f0b1b963James Dongdh0                 DN  D0.S16
1800c1bc742181ded4930842b46e9507372f0b1b963James Dongdh1                 DN  D1.S16
1810c1bc742181ded4930842b46e9507372f0b1b963James Dongdh2                 DN  D2.S16
1820c1bc742181ded4930842b46e9507372f0b1b963James Dongdh3                 DN  D3.S16
1830c1bc742181ded4930842b46e9507372f0b1b963James Dong
1840c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers used in DequantTransformResidualFromPairAndAdd
1850c1bc742181ded4930842b46e9507372f0b1b963James Dong
1860c1bc742181ded4930842b46e9507372f0b1b963James DongdDeltaRow0          DN  D0.S16
1870c1bc742181ded4930842b46e9507372f0b1b963James DongdDeltaRow1          DN  D1.S16
1880c1bc742181ded4930842b46e9507372f0b1b963James DongdDeltaRow2          DN  D2.S16
1890c1bc742181ded4930842b46e9507372f0b1b963James DongdDeltaRow3          DN  D3.S16
1900c1bc742181ded4930842b46e9507372f0b1b963James DongqDeltaRow01         QN  Q0.S16
1910c1bc742181ded4930842b46e9507372f0b1b963James DongqDeltaRow23         QN  Q1.S16
1920c1bc742181ded4930842b46e9507372f0b1b963James Dong
1930c1bc742181ded4930842b46e9507372f0b1b963James DongdPredValRow01       DN  D4.U8
1940c1bc742181ded4930842b46e9507372f0b1b963James DongdPredValRow23       DN  D5.U8
1950c1bc742181ded4930842b46e9507372f0b1b963James Dong
1960c1bc742181ded4930842b46e9507372f0b1b963James DongqSumRow01           QN  Q3.S16
1970c1bc742181ded4930842b46e9507372f0b1b963James DongqSumRow23           QN  Q4.S16
1980c1bc742181ded4930842b46e9507372f0b1b963James DongdDstRow01           DN  D0.U8
1990c1bc742181ded4930842b46e9507372f0b1b963James DongdDstRow23           DN  D1.U8
2000c1bc742181ded4930842b46e9507372f0b1b963James DongdDstRow0            DN  D0.32[0]
2010c1bc742181ded4930842b46e9507372f0b1b963James DongdDstRow1            DN  D0.32[1]
2020c1bc742181ded4930842b46e9507372f0b1b963James DongdDstRow2            DN  D1.32[0]
2030c1bc742181ded4930842b46e9507372f0b1b963James DongdDstRow3            DN  D1.32[1]
2040c1bc742181ded4930842b46e9507372f0b1b963James Dong
2050c1bc742181ded4930842b46e9507372f0b1b963James Dong
2060c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// Allocate stack memory required by the function
2070c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC8 pBuffer, 32
2080c1bc742181ded4930842b46e9507372f0b1b963James Dong
2090c1bc742181ded4930842b46e9507372f0b1b963James Dong
2100c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// Write function header
2110c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_START omxVCM4P10_DequantTransformResidualFromPairAndAdd,r11,d9
2120c1bc742181ded4930842b46e9507372f0b1b963James Dong
2130c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Define stack arguments
2140c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG   predStepOnStack, 4
2150c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG   dstStepOnStack,4
2160c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG   QPOnStack, 4
2170c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG   ACOnStack,4
2180c1bc742181ded4930842b46e9507372f0b1b963James Dong
2190c1bc742181ded4930842b46e9507372f0b1b963James Dong
2200c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   pDelta,pBuffer
2210c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR   AC,ACOnStack
2220c1bc742181ded4930842b46e9507372f0b1b963James Dong
2230c1bc742181ded4930842b46e9507372f0b1b963James Dong
2240c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Save registers r1,r2,r3 before function call
2250c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     pPredTemp,pPred
2260c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     pDCTemp,pDC
2270c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     pDstTemp,pDst
2280c1bc742181ded4930842b46e9507372f0b1b963James Dong
2290c1bc742181ded4930842b46e9507372f0b1b963James Dong        CMP     AC,#0
2300c1bc742181ded4930842b46e9507372f0b1b963James Dong        BEQ     DCcase
2310c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     pDeltaArg1,pDelta                           ;// Set up r1 for armVCM4P10_UnpackBlock4x4
2320c1bc742181ded4930842b46e9507372f0b1b963James Dong
2330c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_UnpackBlock4x4
2340c1bc742181ded4930842b46e9507372f0b1b963James Dong
2350c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//--------------------------------------------------------
2360c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// armVCM4P10_DequantLumaAC4x4 : static function inlined
2370c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//--------------------------------------------------------
2380c1bc742181ded4930842b46e9507372f0b1b963James Dong
2390c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//BL      armVCM4P10_DequantLumaAC4x4
2400c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR   QP,QPOnStack                                ;// Set up r1 for armVCM4P10_DequantLumaAC4x4
2410c1bc742181ded4930842b46e9507372f0b1b963James Dong
2420c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR    pQPmod,=armVCM4P10_QPModuloTable
2430c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR    pQPdiv,=armVCM4P10_QPDivTable
2440c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR    pVRow,=armVCM4P10_VMatrixU16
2450c1bc742181ded4930842b46e9507372f0b1b963James Dong
2460c1bc742181ded4930842b46e9507372f0b1b963James Dong
2470c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRSB  QPmod,[pQPmod,QP]                    ;// (QP%6) * 6
2480c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRSB  shift,[pQPdiv,QP]                    ;// Shift = QP / 6
2490c1bc742181ded4930842b46e9507372f0b1b963James Dong
2500c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR    index1,=0x03020504
2510c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR    index0,=0x05040100                   ;// Indexes into dVmatrix
2520c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD    pVRow,pVRow,QPmod
2530c1bc742181ded4930842b46e9507372f0b1b963James Dong        VDUP   dindexRow0,index0
2540c1bc742181ded4930842b46e9507372f0b1b963James Dong        VDUP   dindexRow1,index1
2550c1bc742181ded4930842b46e9507372f0b1b963James Dong        VDUP   dShift,shift
2560c1bc742181ded4930842b46e9507372f0b1b963James Dong
2570c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Load all 4x4 pVRow[] values
2580c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1   dVmatrix,[pVRow]                     ;// dVmatrix = [0d|0c|0b|0a]
2590c1bc742181ded4930842b46e9507372f0b1b963James Dong
2600c1bc742181ded4930842b46e9507372f0b1b963James Dong
2610c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTBL   dVRow0,dVmatrix,dByteIndexRow0       ;// row0 = row2 = [pVRow[2] | pVRow[0] | pVRow[2] | pVRow[0]]
2620c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTBL   dVRow1,dVmatrix,dByteIndexRow1       ;// row1 = row3 = [pVRow[1] | pVRow[2] | pVRow[1] | pVRow[2]]
2630c1bc742181ded4930842b46e9507372f0b1b963James Dong        CMP     pDCTemp,#0
2640c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Load all the 4x4 'src' values
2650c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1   { dSrcRow0,dSrcRow1,dSrcRow2,dSrcRow3 },[pDelta]
2660c1bc742181ded4930842b46e9507372f0b1b963James Dong
2670c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSHL   dVRow0U16,dVRow0U16,dShift
2680c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSHL   dVRow1U16,dVRow1U16,dShift
2690c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRSHNE DCval,[pDCTemp]
2700c1bc742181ded4930842b46e9507372f0b1b963James Dong
2710c1bc742181ded4930842b46e9507372f0b1b963James Dong
2720c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Multiply src[] with pVRow[]
2730c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMUL    dDqntRow0,dSrcRow0,dVRow0U16
2740c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMUL    dDqntRow1,dSrcRow1,dVRow1U16
2750c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMUL    dDqntRow2,dSrcRow2,dVRow2U16
2760c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMUL    dDqntRow3,dSrcRow3,dVRow3U16
2770c1bc742181ded4930842b46e9507372f0b1b963James Dong
2780c1bc742181ded4930842b46e9507372f0b1b963James Dong
2790c1bc742181ded4930842b46e9507372f0b1b963James Dong
2800c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//-------------------------------------------------------------
2810c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// TransformResidual4x4 : Inlined to avoid Load/Stores
2820c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//-------------------------------------------------------------
2830c1bc742181ded4930842b46e9507372f0b1b963James Dong
2840c1bc742181ded4930842b46e9507372f0b1b963James Dong
2850c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//BL      armVCM4P10_TransformResidual4x4
2860c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//STRHNE  DCval,[pDelta]
2870c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMOVNE    dIn0[0],DCval
2880c1bc742181ded4930842b46e9507372f0b1b963James Dong
2890c1bc742181ded4930842b46e9507372f0b1b963James Dong
2900c1bc742181ded4930842b46e9507372f0b1b963James Dong
2910c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//*****************************************************************
2920c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Transpose the input pixels : perform Row ops as Col ops
2930c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//*****************************************************************
2940c1bc742181ded4930842b46e9507372f0b1b963James Dong
2950c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTRN    dIn0,dIn1
2960c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTRN    dIn2,dIn3
2970c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTRN    qIn01,qIn23
2980c1bc742181ded4930842b46e9507372f0b1b963James Dong
2990c1bc742181ded4930842b46e9507372f0b1b963James Dong
3000c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMOV    dZero,#0                                    ;// Used to right shift by 1
3010c1bc742181ded4930842b46e9507372f0b1b963James Dong
3020c1bc742181ded4930842b46e9507372f0b1b963James Dong
3030c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//****************************************
3040c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Row Operations (Performed on columns)
3050c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//****************************************
3060c1bc742181ded4930842b46e9507372f0b1b963James Dong
3070c1bc742181ded4930842b46e9507372f0b1b963James Dong
3080c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADD        de0,dIn0,dIn2                       ;//  e0 = d0 + d2
3090c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        de1,dIn0,dIn2                        ;//  e1 = d0 - d2
3100c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHADD       dIn1RS,dIn1,dZero                   ;// (f1>>1) constZero is a register holding 0
3110c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHADD       dIn3RS,dIn3,dZero
3120c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        de2,dIn1RS,dIn3                     ;//  e2 = (d1>>1) - d3
3130c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADD        de3,dIn1,dIn3RS                        ;//  e3 = d1 + (d3>>1)
3140c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADD        df0,de0,de3                         ;//  f0 = e0 + e3
3150c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADD        df1,de1,de2                            ;//  f1 = e1 + e2
3160c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        df2,de1,de2                            ;//  f2 = e1 - e2
3170c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        df3,de0,de3                            ;//  f3 = e0 - e3
3180c1bc742181ded4930842b46e9507372f0b1b963James Dong
3190c1bc742181ded4930842b46e9507372f0b1b963James Dong
3200c1bc742181ded4930842b46e9507372f0b1b963James Dong
3210c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//*****************************************************************
3220c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Transpose the resultant matrix
3230c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//*****************************************************************
3240c1bc742181ded4930842b46e9507372f0b1b963James Dong
3250c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTRN    df0,df1
3260c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTRN    df2,df3
3270c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTRN    qf01,qf23
3280c1bc742181ded4930842b46e9507372f0b1b963James Dong
3290c1bc742181ded4930842b46e9507372f0b1b963James Dong
3300c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//*******************************
3310c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Coloumn Operations
3320c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//*******************************
3330c1bc742181ded4930842b46e9507372f0b1b963James Dong
3340c1bc742181ded4930842b46e9507372f0b1b963James Dong
3350c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADD        dg0,df0,df2                         ;//  e0 = d0 + d2
3360c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        dg1,df0,df2                            ;//  e1 = d0 - d2
3370c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHADD       df1RS,df1,dZero                     ;// (f1>>1) constZero is a register holding 0
3380c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHADD       df3RS,df3,dZero
3390c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        dg2,df1RS,df3                       ;//  e2 = (d1>>1) - d3
3400c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADD        dg3,df1,df3RS                        ;//  e3 = d1 + (d3>>1)
3410c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADD        dh0,dg0,dg3                         ;//  f0 = e0 + e3
3420c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADD        dh1,dg1,dg2                            ;//  f1 = e1 + e2
3430c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        dh2,dg1,dg2                            ;//  f2 = e1 - e2
3440c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        dh3,dg0,dg3                            ;//  f3 = e0 - e3
3450c1bc742181ded4930842b46e9507372f0b1b963James Dong
3460c1bc742181ded4930842b46e9507372f0b1b963James Dong
3470c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//************************************************
3480c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Calculate final value (colOp[i][j] + 32)>>6
3490c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//************************************************
3500c1bc742181ded4930842b46e9507372f0b1b963James Dong
3510c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRSHR       dh0,#6
3520c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRSHR       dh1,#6
3530c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRSHR       dh2,#6
3540c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRSHR       dh3,#6
3550c1bc742181ded4930842b46e9507372f0b1b963James Dong
3560c1bc742181ded4930842b46e9507372f0b1b963James Dong
3570c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       OutDCcase
3580c1bc742181ded4930842b46e9507372f0b1b963James Dong
3590c1bc742181ded4930842b46e9507372f0b1b963James Dong
3600c1bc742181ded4930842b46e9507372f0b1b963James DongDCcase
3610c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Calculate the Transformed DCvalue : (DCval+32)>>6
3620c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRSH   DCval,[pDCTemp]
3630c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     DCval,DCval,#32
3640c1bc742181ded4930842b46e9507372f0b1b963James Dong        ASR     DCval,DCval,#6
3650c1bc742181ded4930842b46e9507372f0b1b963James Dong
3660c1bc742181ded4930842b46e9507372f0b1b963James Dong        VDUP    dDeltaRow0, DCval                       ;// pDelta[0]  = pDelta[1]  = pDelta[2]  = pDelta[3] = DCval
3670c1bc742181ded4930842b46e9507372f0b1b963James Dong        VDUP    dDeltaRow1, DCval                        ;// pDelta[4]  = pDelta[5]  = pDelta[6]  = pDelta[7] = DCval
3680c1bc742181ded4930842b46e9507372f0b1b963James Dong        VDUP    dDeltaRow2, DCval                        ;// pDelta[8]  = pDelta[9]  = pDelta[10] = pDelta[11] = DCval
3690c1bc742181ded4930842b46e9507372f0b1b963James Dong        VDUP    dDeltaRow3, DCval
3700c1bc742181ded4930842b46e9507372f0b1b963James Dong
3710c1bc742181ded4930842b46e9507372f0b1b963James Dong
3720c1bc742181ded4930842b46e9507372f0b1b963James DongOutDCcase
3730c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR   predstep,predStepOnStack
3740c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR   dstStep,dstStepOnStack
3750c1bc742181ded4930842b46e9507372f0b1b963James Dong
3760c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     PredVal1,[pPredTemp],predstep
3770c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     PredVal2,[pPredTemp],predstep
3780c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMOV    dPredValRow01,PredVal1,PredVal2
3790c1bc742181ded4930842b46e9507372f0b1b963James Dong
3800c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     PredVal1,[pPredTemp],predstep
3810c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     PredVal2,[pPredTemp]
3820c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMOV    dPredValRow23,PredVal1,PredVal2
3830c1bc742181ded4930842b46e9507372f0b1b963James Dong
3840c1bc742181ded4930842b46e9507372f0b1b963James Dong
3850c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADDW   qSumRow01,qDeltaRow01,dPredValRow01
3860c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADDW   qSumRow23,qDeltaRow23,dPredValRow23
3870c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQMOVUN dDstRow01,qSumRow01
3880c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQMOVUN dDstRow23,qSumRow23
3890c1bc742181ded4930842b46e9507372f0b1b963James Dong
3900c1bc742181ded4930842b46e9507372f0b1b963James Dong
3910c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    dDstRow0,[pDstTemp],dstStep
3920c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    dDstRow1,[pDstTemp],dstStep
3930c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    dDstRow2,[pDstTemp],dstStep
3940c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    dDstRow3,[pDstTemp]
3950c1bc742181ded4930842b46e9507372f0b1b963James Dong
3960c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Set return value
3970c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     result,#OMX_Sts_NoErr
3980c1bc742181ded4930842b46e9507372f0b1b963James Dong
3990c1bc742181ded4930842b46e9507372f0b1b963James DongEnd
4000c1bc742181ded4930842b46e9507372f0b1b963James Dong
4010c1bc742181ded4930842b46e9507372f0b1b963James Dong
4020c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Write function tail
4030c1bc742181ded4930842b46e9507372f0b1b963James Dong
4040c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_END
4050c1bc742181ded4930842b46e9507372f0b1b963James Dong
4060c1bc742181ded4930842b46e9507372f0b1b963James Dong    ENDIF                                                    ;//CORTEXA8
4070c1bc742181ded4930842b46e9507372f0b1b963James Dong
4080c1bc742181ded4930842b46e9507372f0b1b963James Dong
4090c1bc742181ded4930842b46e9507372f0b1b963James Dong
4100c1bc742181ded4930842b46e9507372f0b1b963James Dong    END
411