10c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Copyright (C) 2007-2008 ARM Limited 378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Licensed under the Apache License, Version 2.0 (the "License"); 578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// you may not use this file except in compliance with the License. 678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// You may obtain a copy of the License at 778e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 878e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// http://www.apache.org/licenses/LICENSE-2.0 978e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 1078e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Unless required by applicable law or agreed to in writing, software 1178e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// distributed under the License is distributed on an "AS IS" BASIS, 1278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// See the License for the specific language governing permissions and 1478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// limitations under the License. 1578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 1678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 170c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 180c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name: armVCM4P10_TransformResidual4x4_s.s 190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2 200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision: 12290 210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date: Wednesday, April 9, 2008 220c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 230c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 240c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 250c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 260c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Description: 270c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Transform Residual 4x4 Coefficients 280c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 290c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 300c1bc742181ded4930842b46e9507372f0b1b963James Dong 310c1bc742181ded4930842b46e9507372f0b1b963James Dong 320c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Include standard headers 330c1bc742181ded4930842b46e9507372f0b1b963James Dong 340c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE omxtypes_s.h 350c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE armCOMM_s.h 360c1bc742181ded4930842b46e9507372f0b1b963James Dong 370c1bc742181ded4930842b46e9507372f0b1b963James Dong M_VARIANTS CortexA8 380c1bc742181ded4930842b46e9507372f0b1b963James Dong 390c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Import symbols required from other files 400c1bc742181ded4930842b46e9507372f0b1b963James Dong;// (For example tables) 410c1bc742181ded4930842b46e9507372f0b1b963James Dong 420c1bc742181ded4930842b46e9507372f0b1b963James Dong 430c1bc742181ded4930842b46e9507372f0b1b963James Dong 440c1bc742181ded4930842b46e9507372f0b1b963James Dong 450c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Set debugging level 460c1bc742181ded4930842b46e9507372f0b1b963James Dong;//DEBUG_ON SETL {TRUE} 470c1bc742181ded4930842b46e9507372f0b1b963James Dong 480c1bc742181ded4930842b46e9507372f0b1b963James Dong 490c1bc742181ded4930842b46e9507372f0b1b963James Dong 500c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Guarding implementation by the processor name 510c1bc742181ded4930842b46e9507372f0b1b963James Dong 520c1bc742181ded4930842b46e9507372f0b1b963James Dong 530c1bc742181ded4930842b46e9507372f0b1b963James Dong 540c1bc742181ded4930842b46e9507372f0b1b963James Dong 550c1bc742181ded4930842b46e9507372f0b1b963James Dong 560c1bc742181ded4930842b46e9507372f0b1b963James Dong 570c1bc742181ded4930842b46e9507372f0b1b963James Dong 580c1bc742181ded4930842b46e9507372f0b1b963James Dong 590c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Guarding implementation by the processor name 600c1bc742181ded4930842b46e9507372f0b1b963James Dong 610c1bc742181ded4930842b46e9507372f0b1b963James Dong IF CortexA8 620c1bc742181ded4930842b46e9507372f0b1b963James Dong 630c1bc742181ded4930842b46e9507372f0b1b963James Dong;// ARM Registers 640c1bc742181ded4930842b46e9507372f0b1b963James Dong 650c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Input Registers 660c1bc742181ded4930842b46e9507372f0b1b963James DongpDst RN 0 670c1bc742181ded4930842b46e9507372f0b1b963James DongpSrc RN 1 680c1bc742181ded4930842b46e9507372f0b1b963James Dong 690c1bc742181ded4930842b46e9507372f0b1b963James Dong 700c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Neon Registers 710c1bc742181ded4930842b46e9507372f0b1b963James Dong 720c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Packed Input pixels 730c1bc742181ded4930842b46e9507372f0b1b963James DongdIn0 DN D0.S16 740c1bc742181ded4930842b46e9507372f0b1b963James DongdIn1 DN D1.S16 750c1bc742181ded4930842b46e9507372f0b1b963James DongdIn2 DN D2.S16 760c1bc742181ded4930842b46e9507372f0b1b963James DongdIn3 DN D3.S16 770c1bc742181ded4930842b46e9507372f0b1b963James Dong 780c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Intermediate calculations 790c1bc742181ded4930842b46e9507372f0b1b963James DongdZero DN D4.S16 800c1bc742181ded4930842b46e9507372f0b1b963James Dongde0 DN D5.S16 810c1bc742181ded4930842b46e9507372f0b1b963James Dongde1 DN D6.S16 820c1bc742181ded4930842b46e9507372f0b1b963James Dongde2 DN D7.S16 830c1bc742181ded4930842b46e9507372f0b1b963James Dongde3 DN D8.S16 840c1bc742181ded4930842b46e9507372f0b1b963James DongdIn1RS DN D7.S16 850c1bc742181ded4930842b46e9507372f0b1b963James DongdIn3RS DN D8.S16 860c1bc742181ded4930842b46e9507372f0b1b963James Dongdf0 DN D0.S16 870c1bc742181ded4930842b46e9507372f0b1b963James Dongdf1 DN D1.S16 880c1bc742181ded4930842b46e9507372f0b1b963James Dongdf2 DN D2.S16 890c1bc742181ded4930842b46e9507372f0b1b963James Dongdf3 DN D3.S16 900c1bc742181ded4930842b46e9507372f0b1b963James Dongqf01 QN Q0.32 910c1bc742181ded4930842b46e9507372f0b1b963James Dongqf23 QN Q1.32 920c1bc742181ded4930842b46e9507372f0b1b963James Dongdg0 DN D5.S16 930c1bc742181ded4930842b46e9507372f0b1b963James Dongdg1 DN D6.S16 940c1bc742181ded4930842b46e9507372f0b1b963James Dongdg2 DN D7.S16 950c1bc742181ded4930842b46e9507372f0b1b963James Dongdg3 DN D8.S16 960c1bc742181ded4930842b46e9507372f0b1b963James Dongdf1RS DN D7.S16 970c1bc742181ded4930842b46e9507372f0b1b963James Dongdf3RS DN D8.S16 980c1bc742181ded4930842b46e9507372f0b1b963James Dong 990c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Output pixels 1000c1bc742181ded4930842b46e9507372f0b1b963James Dongdh0 DN D0.S16 1010c1bc742181ded4930842b46e9507372f0b1b963James Dongdh1 DN D1.S16 1020c1bc742181ded4930842b46e9507372f0b1b963James Dongdh2 DN D2.S16 1030c1bc742181ded4930842b46e9507372f0b1b963James Dongdh3 DN D3.S16 1040c1bc742181ded4930842b46e9507372f0b1b963James Dong 1050c1bc742181ded4930842b46e9507372f0b1b963James Dong 1060c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Allocate stack memory required by the function 1070c1bc742181ded4930842b46e9507372f0b1b963James Dong 1080c1bc742181ded4930842b46e9507372f0b1b963James Dong 1090c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Write function header 1100c1bc742181ded4930842b46e9507372f0b1b963James Dong M_START armVCM4P10_TransformResidual4x4, ,d8 1110c1bc742181ded4930842b46e9507372f0b1b963James Dong 1120c1bc742181ded4930842b46e9507372f0b1b963James Dong ;****************************************************************** 1130c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// The strategy used in implementing the transform is as follows:* 1140c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Load the 4x4 block into 8 registers * 1150c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Transpose the 4x4 matrix * 1160c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Perform the row operations (on columns) using SIMD * 1170c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Transpose the 4x4 result matrix * 1180c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Perform the coloumn operations * 1190c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Store the 4x4 block at one go * 1200c1bc742181ded4930842b46e9507372f0b1b963James Dong ;****************************************************************** 1210c1bc742181ded4930842b46e9507372f0b1b963James Dong 1220c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Load all the 4x4 pixels in transposed form 1230c1bc742181ded4930842b46e9507372f0b1b963James Dong 1240c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD4 {dIn0,dIn1,dIn2,dIn3},[pSrc] 1250c1bc742181ded4930842b46e9507372f0b1b963James Dong 1260c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOV dZero,#0 ;// Used to right shift by 1 1270c1bc742181ded4930842b46e9507372f0b1b963James Dong 1280c1bc742181ded4930842b46e9507372f0b1b963James Dong 1290c1bc742181ded4930842b46e9507372f0b1b963James Dong ;**************************************** 1300c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Row Operations (Performed on columns) 1310c1bc742181ded4930842b46e9507372f0b1b963James Dong ;**************************************** 1320c1bc742181ded4930842b46e9507372f0b1b963James Dong 1330c1bc742181ded4930842b46e9507372f0b1b963James Dong 1340c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD de0,dIn0,dIn2 ;// e0 = d0 + d2 1350c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB de1,dIn0,dIn2 ;// e1 = d0 - d2 1360c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD dIn1RS,dIn1,dZero ;// (f1>>1) constZero is a register holding 0 1370c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD dIn3RS,dIn3,dZero 1380c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB de2,dIn1RS,dIn3 ;// e2 = (d1>>1) - d3 1390c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD de3,dIn1,dIn3RS ;// e3 = d1 + (d3>>1) 1400c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD df0,de0,de3 ;// f0 = e0 + e3 1410c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD df1,de1,de2 ;// f1 = e1 + e2 1420c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB df2,de1,de2 ;// f2 = e1 - e2 1430c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB df3,de0,de3 ;// f3 = e0 - e3 1440c1bc742181ded4930842b46e9507372f0b1b963James Dong 1450c1bc742181ded4930842b46e9507372f0b1b963James Dong 1460c1bc742181ded4930842b46e9507372f0b1b963James Dong 1470c1bc742181ded4930842b46e9507372f0b1b963James Dong ;***************************************************************** 1480c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Transpose the resultant matrix 1490c1bc742181ded4930842b46e9507372f0b1b963James Dong ;***************************************************************** 1500c1bc742181ded4930842b46e9507372f0b1b963James Dong 1510c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN df0,df1 1520c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN df2,df3 1530c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qf01,qf23 1540c1bc742181ded4930842b46e9507372f0b1b963James Dong 1550c1bc742181ded4930842b46e9507372f0b1b963James Dong 1560c1bc742181ded4930842b46e9507372f0b1b963James Dong ;******************************* 1570c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Coloumn Operations 1580c1bc742181ded4930842b46e9507372f0b1b963James Dong ;******************************* 1590c1bc742181ded4930842b46e9507372f0b1b963James Dong 1600c1bc742181ded4930842b46e9507372f0b1b963James Dong 1610c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD dg0,df0,df2 ;// e0 = d0 + d2 1620c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB dg1,df0,df2 ;// e1 = d0 - d2 1630c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD df1RS,df1,dZero ;// (f1>>1) constZero is a register holding 0 1640c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD df3RS,df3,dZero 1650c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB dg2,df1RS,df3 ;// e2 = (d1>>1) - d3 1660c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD dg3,df1,df3RS ;// e3 = d1 + (d3>>1) 1670c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD dh0,dg0,dg3 ;// f0 = e0 + e3 1680c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD dh1,dg1,dg2 ;// f1 = e1 + e2 1690c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB dh2,dg1,dg2 ;// f2 = e1 - e2 1700c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB dh3,dg0,dg3 ;// f3 = e0 - e3 1710c1bc742181ded4930842b46e9507372f0b1b963James Dong 1720c1bc742181ded4930842b46e9507372f0b1b963James Dong 1730c1bc742181ded4930842b46e9507372f0b1b963James Dong ;************************************************ 1740c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Calculate final value (colOp[i][j] + 32)>>6 1750c1bc742181ded4930842b46e9507372f0b1b963James Dong ;************************************************ 1760c1bc742181ded4930842b46e9507372f0b1b963James Dong 1770c1bc742181ded4930842b46e9507372f0b1b963James Dong VRSHR dh0,#6 1780c1bc742181ded4930842b46e9507372f0b1b963James Dong VRSHR dh1,#6 1790c1bc742181ded4930842b46e9507372f0b1b963James Dong VRSHR dh2,#6 1800c1bc742181ded4930842b46e9507372f0b1b963James Dong VRSHR dh3,#6 1810c1bc742181ded4930842b46e9507372f0b1b963James Dong 1820c1bc742181ded4930842b46e9507372f0b1b963James Dong 1830c1bc742181ded4930842b46e9507372f0b1b963James Dong ;*************************** 1840c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Store all the 4x4 pixels 1850c1bc742181ded4930842b46e9507372f0b1b963James Dong ;*************************** 1860c1bc742181ded4930842b46e9507372f0b1b963James Dong 1870c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {dh0,dh1,dh2,dh3},[pDst] 1880c1bc742181ded4930842b46e9507372f0b1b963James Dong 1890c1bc742181ded4930842b46e9507372f0b1b963James Dong 1900c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Set return value 1910c1bc742181ded4930842b46e9507372f0b1b963James Dong 1920c1bc742181ded4930842b46e9507372f0b1b963James DongEnd 1930c1bc742181ded4930842b46e9507372f0b1b963James Dong 1940c1bc742181ded4930842b46e9507372f0b1b963James Dong 1950c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Write function tail 1960c1bc742181ded4930842b46e9507372f0b1b963James Dong M_END 1970c1bc742181ded4930842b46e9507372f0b1b963James Dong 1980c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF ;//CortexA8 1990c1bc742181ded4930842b46e9507372f0b1b963James Dong 20078e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar END 201