10c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Copyright (C) 2007-2008 ARM Limited 378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Licensed under the Apache License, Version 2.0 (the "License"); 578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// you may not use this file except in compliance with the License. 678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// You may obtain a copy of the License at 778e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 878e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// http://www.apache.org/licenses/LICENSE-2.0 978e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 1078e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Unless required by applicable law or agreed to in writing, software 1178e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// distributed under the License is distributed on an "AS IS" BASIS, 1278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// See the License for the specific language governing permissions and 1478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// limitations under the License. 1578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 1678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 170c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 180c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name: armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s 190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2 200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision: 12290 210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date: Wednesday, April 9, 2008 220c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 230c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 240c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 250c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 260c1bc742181ded4930842b46e9507372f0b1b963James Dong 270c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE omxtypes_s.h 280c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE armCOMM_s.h 290c1bc742181ded4930842b46e9507372f0b1b963James Dong 300c1bc742181ded4930842b46e9507372f0b1b963James Dong M_VARIANTS CortexA8 310c1bc742181ded4930842b46e9507372f0b1b963James Dong 320c1bc742181ded4930842b46e9507372f0b1b963James Dong EXPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 330c1bc742181ded4930842b46e9507372f0b1b963James Dong 340c1bc742181ded4930842b46e9507372f0b1b963James Dong IF CortexA8 350c1bc742181ded4930842b46e9507372f0b1b963James Dong 360c1bc742181ded4930842b46e9507372f0b1b963James Dong M_START armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe, r11 370c1bc742181ded4930842b46e9507372f0b1b963James Dong 380c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare input registers 390c1bc742181ded4930842b46e9507372f0b1b963James DongpSrc RN 0 400c1bc742181ded4930842b46e9507372f0b1b963James DongsrcStep RN 1 410c1bc742181ded4930842b46e9507372f0b1b963James DongpDst RN 2 420c1bc742181ded4930842b46e9507372f0b1b963James DongdstStep RN 3 430c1bc742181ded4930842b46e9507372f0b1b963James Dong 440c1bc742181ded4930842b46e9507372f0b1b963James DongTemp RN 12 450c1bc742181ded4930842b46e9507372f0b1b963James Dong 460c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare Neon registers 470c1bc742181ded4930842b46e9507372f0b1b963James DongdCoeff5 DN 30.S16 480c1bc742181ded4930842b46e9507372f0b1b963James DongdCoeff20 DN 31.S16 490c1bc742181ded4930842b46e9507372f0b1b963James Dong 500c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc0 DN 7.U8 510c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc1 DN 8.U8 520c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc2 DN 9.U8 530c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc3 DN 10.U8 540c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc4 DN 11.U8 550c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc5 DN 12.U8 560c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc6 DN 13.U8 570c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc7 DN 14.U8 580c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc8 DN 15.U8 590c1bc742181ded4930842b46e9507372f0b1b963James Dong 600c1bc742181ded4930842b46e9507372f0b1b963James DongqSumBE01 QN 8.S16 610c1bc742181ded4930842b46e9507372f0b1b963James DongqSumCD01 QN 9.S16 620c1bc742181ded4930842b46e9507372f0b1b963James DongdSumBE0 DN 16.S16 630c1bc742181ded4930842b46e9507372f0b1b963James DongdSumCD0 DN 18.S16 640c1bc742181ded4930842b46e9507372f0b1b963James Dong 650c1bc742181ded4930842b46e9507372f0b1b963James DongqAcc01 QN 0.S16 660c1bc742181ded4930842b46e9507372f0b1b963James DongqAcc23 QN 1.S16 670c1bc742181ded4930842b46e9507372f0b1b963James DongqAcc45 QN 2.S16 680c1bc742181ded4930842b46e9507372f0b1b963James DongqAcc67 QN 3.S16 690c1bc742181ded4930842b46e9507372f0b1b963James Dong 700c1bc742181ded4930842b46e9507372f0b1b963James DongdRes0 DN 0.S16 710c1bc742181ded4930842b46e9507372f0b1b963James DongdRes1 DN 2.S16 720c1bc742181ded4930842b46e9507372f0b1b963James DongdRes2 DN 4.S16 730c1bc742181ded4930842b46e9507372f0b1b963James DongdRes3 DN 6.S16 740c1bc742181ded4930842b46e9507372f0b1b963James Dong 750c1bc742181ded4930842b46e9507372f0b1b963James DongdAcc0 DN 0.U8 760c1bc742181ded4930842b46e9507372f0b1b963James DongdAcc1 DN 2.U8 770c1bc742181ded4930842b46e9507372f0b1b963James DongdAcc2 DN 4.U8 780c1bc742181ded4930842b46e9507372f0b1b963James DongdAcc3 DN 6.U8 790c1bc742181ded4930842b46e9507372f0b1b963James Dong 800c1bc742181ded4930842b46e9507372f0b1b963James Dong 810c1bc742181ded4930842b46e9507372f0b1b963James DongdTmp0 DN 20.S16 820c1bc742181ded4930842b46e9507372f0b1b963James DongdTmp1 DN 21.S16 830c1bc742181ded4930842b46e9507372f0b1b963James DongdTmp2 DN 22.S16 840c1bc742181ded4930842b46e9507372f0b1b963James DongdTmp3 DN 23.S16 850c1bc742181ded4930842b46e9507372f0b1b963James Dong 860c1bc742181ded4930842b46e9507372f0b1b963James Dong 870c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 dSrc0, [pSrc], srcStep ;// [a0 a1 a2 a3 .. ] 880c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Temp, pSrc, srcStep, LSL #2 890c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 dSrc1, [pSrc], srcStep ;// [b0 b1 b2 b3 .. ] 900c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// One cycle stall 910c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 dSrc5, [Temp], srcStep 920c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// One cycle stall 930c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 dSrc2, [pSrc], srcStep ;// [c0 c1 c2 c3 .. ] 940c1bc742181ded4930842b46e9507372f0b1b963James Dong VADDL qAcc01, dSrc0, dSrc5 ;// Acc = a+f 950c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 dSrc3, [pSrc], srcStep 960c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// One cycle stall 970c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 dSrc6, [Temp], srcStep ;// TeRi 980c1bc742181ded4930842b46e9507372f0b1b963James Dong 990c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 dSrc4, [pSrc], srcStep 1000c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 dSrc7, [Temp], srcStep ;// TeRi 1010c1bc742181ded4930842b46e9507372f0b1b963James Dong VADDL qSumBE01, dSrc1, dSrc4 ;// b+e 1020c1bc742181ded4930842b46e9507372f0b1b963James Dong VADDL qSumCD01, dSrc2, dSrc3 ;// c+d 1030c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 dSrc8, [Temp], srcStep ;// TeRi 1040c1bc742181ded4930842b46e9507372f0b1b963James Dong VMLS dRes0, dSumBE0, dCoeff5 ;// Acc -= 20*(b+e) 1050c1bc742181ded4930842b46e9507372f0b1b963James Dong; VMLA dRes0, dSumCD0, dCoeff20 ;// Acc += 20*(c+d) 1060c1bc742181ded4930842b46e9507372f0b1b963James Dong VMUL dTmp0, dSumCD0, dCoeff20 ;// Acc += 20*(c+d) 1070c1bc742181ded4930842b46e9507372f0b1b963James Dong 1080c1bc742181ded4930842b46e9507372f0b1b963James Dong; VLD1 dSrc6, [Temp], srcStep 1090c1bc742181ded4930842b46e9507372f0b1b963James Dong VADDL qSumBE01, dSrc2, dSrc5 ;// b+e 1100c1bc742181ded4930842b46e9507372f0b1b963James Dong VADDL qSumCD01, dSrc3, dSrc4 ;// c+d 1110c1bc742181ded4930842b46e9507372f0b1b963James Dong VADDL qAcc23, dSrc1, dSrc6 ;// Acc = a+f 1120c1bc742181ded4930842b46e9507372f0b1b963James Dong VMLS dRes1, dSumBE0, dCoeff5 ;// Acc -= 20*(b+e) 1130c1bc742181ded4930842b46e9507372f0b1b963James Dong; VMLA dRes1, dSumCD0, dCoeff20 ;// Acc += 20*(c+d) 1140c1bc742181ded4930842b46e9507372f0b1b963James Dong VMUL dTmp1, dSumCD0, dCoeff20 ;// Acc += 20*(c+d) 1150c1bc742181ded4930842b46e9507372f0b1b963James Dong 1160c1bc742181ded4930842b46e9507372f0b1b963James Dong; VLD1 dSrc7, [Temp], srcStep 1170c1bc742181ded4930842b46e9507372f0b1b963James Dong VADDL qSumBE01, dSrc3, dSrc6 ;// b+e 1180c1bc742181ded4930842b46e9507372f0b1b963James Dong VADDL qSumCD01, dSrc4, dSrc5 ;// c+d 1190c1bc742181ded4930842b46e9507372f0b1b963James Dong VADDL qAcc45, dSrc2, dSrc7 ;// Acc = a+f 1200c1bc742181ded4930842b46e9507372f0b1b963James Dong VMLS dRes2, dSumBE0, dCoeff5 ;// Acc -= 20*(b+e) 1210c1bc742181ded4930842b46e9507372f0b1b963James Dong; VMLA dRes2, dSumCD0, dCoeff20 ;// Acc += 20*(c+d) 1220c1bc742181ded4930842b46e9507372f0b1b963James Dong VMUL dTmp2, dSumCD0, dCoeff20 ;// Acc += 20*(c+d) 1230c1bc742181ded4930842b46e9507372f0b1b963James Dong 1240c1bc742181ded4930842b46e9507372f0b1b963James Dong; VLD1 dSrc8, [Temp], srcStep ;// [i0 i1 i2 i3 .. ] 1250c1bc742181ded4930842b46e9507372f0b1b963James Dong VADDL qSumBE01, dSrc4, dSrc7 ;// b+e 1260c1bc742181ded4930842b46e9507372f0b1b963James Dong VADDL qAcc67, dSrc3, dSrc8 ;// Acc = a+f 1270c1bc742181ded4930842b46e9507372f0b1b963James Dong VADDL qSumCD01, dSrc5, dSrc6 ;// c+d 1280c1bc742181ded4930842b46e9507372f0b1b963James Dong VMLS dRes3, dSumBE0, dCoeff5 ;// Acc -= 20*(b+e) 1290c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD dRes0, dRes0, dTmp0 1300c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD dRes1, dRes1, dTmp1 1310c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD dRes2, dRes2, dTmp2 1320c1bc742181ded4930842b46e9507372f0b1b963James Dong VMLA dRes3, dSumCD0, dCoeff20 ;// Acc += 20*(c+d) 1330c1bc742181ded4930842b46e9507372f0b1b963James Dong; VMUL dTmp3, dSumCD0, dCoeff20 ;// Acc += 20*(c+d) 1340c1bc742181ded4930842b46e9507372f0b1b963James Dong; VADD dRes3, dRes3, dTmp3 1350c1bc742181ded4930842b46e9507372f0b1b963James Dong 1360c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRUN dAcc0, qAcc01, #5 1370c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRUN dAcc1, qAcc23, #5 1380c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRUN dAcc2, qAcc45, #5 1390c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRUN dAcc3, qAcc67, #5 1400c1bc742181ded4930842b46e9507372f0b1b963James Dong 1410c1bc742181ded4930842b46e9507372f0b1b963James Dong M_END 1420c1bc742181ded4930842b46e9507372f0b1b963James Dong 1430c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 1440c1bc742181ded4930842b46e9507372f0b1b963James Dong 1450c1bc742181ded4930842b46e9507372f0b1b963James Dong 1460c1bc742181ded4930842b46e9507372f0b1b963James Dong 1470c1bc742181ded4930842b46e9507372f0b1b963James Dong END 1480c1bc742181ded4930842b46e9507372f0b1b963James Dong 149