10c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Copyright (C) 2007-2008 ARM Limited 378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Licensed under the Apache License, Version 2.0 (the "License"); 578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// you may not use this file except in compliance with the License. 678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// You may obtain a copy of the License at 778e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 878e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// http://www.apache.org/licenses/LICENSE-2.0 978e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 1078e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Unless required by applicable law or agreed to in writing, software 1178e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// distributed under the License is distributed on an "AS IS" BASIS, 1278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// See the License for the specific language governing permissions and 1478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// limitations under the License. 1578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 1678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 170c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 180c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name: armVCM4P10_InterpolateLuma_Align_unsafe_s.s 190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2 200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision: 12290 210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date: Wednesday, April 9, 2008 220c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 230c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 240c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 250c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 260c1bc742181ded4930842b46e9507372f0b1b963James Dong 270c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE omxtypes_s.h 280c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE armCOMM_s.h 290c1bc742181ded4930842b46e9507372f0b1b963James Dong 300c1bc742181ded4930842b46e9507372f0b1b963James Dong M_VARIANTS ARM1136JS 310c1bc742181ded4930842b46e9507372f0b1b963James Dong 320c1bc742181ded4930842b46e9507372f0b1b963James Dong EXPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 330c1bc742181ded4930842b46e9507372f0b1b963James Dong EXPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 340c1bc742181ded4930842b46e9507372f0b1b963James Dong 350c1bc742181ded4930842b46e9507372f0b1b963James DongDEBUG_ON SETL {FALSE} 360c1bc742181ded4930842b46e9507372f0b1b963James Dong 370c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ARM1136JS 380c1bc742181ded4930842b46e9507372f0b1b963James Dong 390c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare input registers 400c1bc742181ded4930842b46e9507372f0b1b963James DongpSrc RN 0 410c1bc742181ded4930842b46e9507372f0b1b963James DongsrcStep RN 1 420c1bc742181ded4930842b46e9507372f0b1b963James DongpDst RN 8 430c1bc742181ded4930842b46e9507372f0b1b963James DongiHeight RN 9 440c1bc742181ded4930842b46e9507372f0b1b963James Dong 450c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare inner loop registers 460c1bc742181ded4930842b46e9507372f0b1b963James Dongx RN 7 470c1bc742181ded4930842b46e9507372f0b1b963James Dongx0 RN 7 480c1bc742181ded4930842b46e9507372f0b1b963James Dongx1 RN 10 490c1bc742181ded4930842b46e9507372f0b1b963James Dongx2 RN 11 500c1bc742181ded4930842b46e9507372f0b1b963James DongScratch RN 12 510c1bc742181ded4930842b46e9507372f0b1b963James Dong 520c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Function: 530c1bc742181ded4930842b46e9507372f0b1b963James Dong;// armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 540c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 550c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Implements copy from an arbitrary aligned source memory location (pSrc) to a 4 byte aligned 560c1bc742181ded4930842b46e9507372f0b1b963James Dong;// destination pointed by (pDst) for horizontal interpolation. 570c1bc742181ded4930842b46e9507372f0b1b963James Dong;// This function needs to copy 9 bytes in horizontal direction. 580c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 590c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers used as input for this function 600c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no rows to copy 610c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 620c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers preserved for top level function 630c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r2,r3,r4,r5,r6 640c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 650c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers modified by the function 660c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r7,r8,r9,r10,r11,r12 670c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 680c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Output registers 690c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r0 - pointer to the new aligned location which will be used as pSrc 700c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r1 - step size to this aligned location 710c1bc742181ded4930842b46e9507372f0b1b963James Dong 720c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Function header 730c1bc742181ded4930842b46e9507372f0b1b963James Dong M_START armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 740c1bc742181ded4930842b46e9507372f0b1b963James Dong 750c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Copy pDst to scratch 760c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Scratch, pDst 770c1bc742181ded4930842b46e9507372f0b1b963James Dong 780c1bc742181ded4930842b46e9507372f0b1b963James DongStartAlignedStackCopy 790c1bc742181ded4930842b46e9507372f0b1b963James Dong AND x, pSrc, #3 800c1bc742181ded4930842b46e9507372f0b1b963James Dong BIC pSrc, pSrc, #3 810c1bc742181ded4930842b46e9507372f0b1b963James Dong 820c1bc742181ded4930842b46e9507372f0b1b963James Dong M_SWITCH x 830c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Copy0toAligned 840c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Copy1toAligned 850c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Copy2toAligned 860c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Copy3toAligned 870c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ENDSWITCH 880c1bc742181ded4930842b46e9507372f0b1b963James Dong 890c1bc742181ded4930842b46e9507372f0b1b963James DongCopy0toAligned 900c1bc742181ded4930842b46e9507372f0b1b963James Dong LDM pSrc, {x0, x1, x2} 910c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS iHeight, iHeight, #1 920c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrc, srcStep 930c1bc742181ded4930842b46e9507372f0b1b963James Dong 940c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// One cycle stall 950c1bc742181ded4930842b46e9507372f0b1b963James Dong 960c1bc742181ded4930842b46e9507372f0b1b963James Dong STM pDst!, {x0, x1, x2} ;// Store aligned output row 970c1bc742181ded4930842b46e9507372f0b1b963James Dong BGT Copy0toAligned 980c1bc742181ded4930842b46e9507372f0b1b963James Dong B CopyEnd 990c1bc742181ded4930842b46e9507372f0b1b963James Dong 1000c1bc742181ded4930842b46e9507372f0b1b963James DongCopy1toAligned 1010c1bc742181ded4930842b46e9507372f0b1b963James Dong LDM pSrc, {x0, x1, x2} 1020c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS iHeight, iHeight, #1 1030c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrc, srcStep 1040c1bc742181ded4930842b46e9507372f0b1b963James Dong 1050c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// One cycle stall 1060c1bc742181ded4930842b46e9507372f0b1b963James Dong 1070c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV x0, x0, LSR #8 1080c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR x0, x0, x1, LSL #24 1090c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV x1, x1, LSR #8 1100c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR x1, x1, x2, LSL #24 1110c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV x2, x2, LSR #8 1120c1bc742181ded4930842b46e9507372f0b1b963James Dong STM pDst!, {x0, x1, x2} ;// Store aligned output row 1130c1bc742181ded4930842b46e9507372f0b1b963James Dong BGT Copy1toAligned 1140c1bc742181ded4930842b46e9507372f0b1b963James Dong B CopyEnd 1150c1bc742181ded4930842b46e9507372f0b1b963James Dong 1160c1bc742181ded4930842b46e9507372f0b1b963James DongCopy2toAligned 1170c1bc742181ded4930842b46e9507372f0b1b963James Dong LDM pSrc, {x0, x1, x2} 1180c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS iHeight, iHeight, #1 1190c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrc, srcStep 1200c1bc742181ded4930842b46e9507372f0b1b963James Dong 1210c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// One cycle stall 1220c1bc742181ded4930842b46e9507372f0b1b963James Dong 1230c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV x0, x0, LSR #16 1240c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR x0, x0, x1, LSL #16 1250c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV x1, x1, LSR #16 1260c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR x1, x1, x2, LSL #16 1270c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV x2, x2, LSR #16 1280c1bc742181ded4930842b46e9507372f0b1b963James Dong STM pDst!, {x0, x1, x2} ;// Store aligned output row 1290c1bc742181ded4930842b46e9507372f0b1b963James Dong BGT Copy2toAligned 1300c1bc742181ded4930842b46e9507372f0b1b963James Dong B CopyEnd 1310c1bc742181ded4930842b46e9507372f0b1b963James Dong 1320c1bc742181ded4930842b46e9507372f0b1b963James DongCopy3toAligned 1330c1bc742181ded4930842b46e9507372f0b1b963James Dong LDM pSrc, {x0, x1, x2} 1340c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS iHeight, iHeight, #1 1350c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrc, srcStep 1360c1bc742181ded4930842b46e9507372f0b1b963James Dong 1370c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// One cycle stall 1380c1bc742181ded4930842b46e9507372f0b1b963James Dong 1390c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV x0, x0, LSR #24 1400c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR x0, x0, x1, LSL #8 1410c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV x1, x1, LSR #24 1420c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR x1, x1, x2, LSL #8 1430c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV x2, x2, LSR #24 1440c1bc742181ded4930842b46e9507372f0b1b963James Dong STM pDst!, {x0, x1, x2} ;// Store aligned output row 1450c1bc742181ded4930842b46e9507372f0b1b963James Dong BGT Copy3toAligned 1460c1bc742181ded4930842b46e9507372f0b1b963James Dong 1470c1bc742181ded4930842b46e9507372f0b1b963James DongCopyEnd 1480c1bc742181ded4930842b46e9507372f0b1b963James Dong 1490c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV pSrc, Scratch 1500c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV srcStep, #12 1510c1bc742181ded4930842b46e9507372f0b1b963James Dong 1520c1bc742181ded4930842b46e9507372f0b1b963James Dong M_END 1530c1bc742181ded4930842b46e9507372f0b1b963James Dong 1540c1bc742181ded4930842b46e9507372f0b1b963James Dong 1550c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Function: 1560c1bc742181ded4930842b46e9507372f0b1b963James Dong;// armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 1570c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 1580c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned 1590c1bc742181ded4930842b46e9507372f0b1b963James Dong;// destination pointed by (pDst) for vertical interpolation. 1600c1bc742181ded4930842b46e9507372f0b1b963James Dong;// This function needs to copy 4 bytes in horizontal direction 1610c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 1620c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers used as input for this function 1630c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no of rows to copy 1640c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 1650c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers preserved for top level function 1660c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r2,r3,r4,r5,r6 1670c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 1680c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers modified by the function 1690c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r7,r8,r9,r10,r11,r12 1700c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 1710c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Output registers 1720c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r0 - pointer to the new aligned location which will be used as pSrc 1730c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r1 - step size to this aligned location 1740c1bc742181ded4930842b46e9507372f0b1b963James Dong 1750c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Function header 1760c1bc742181ded4930842b46e9507372f0b1b963James Dong M_START armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 1770c1bc742181ded4930842b46e9507372f0b1b963James Dong 1780c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Copy pSrc to stack 1790c1bc742181ded4930842b46e9507372f0b1b963James DongStartVAlignedStackCopy 1800c1bc742181ded4930842b46e9507372f0b1b963James Dong AND x, pSrc, #3 1810c1bc742181ded4930842b46e9507372f0b1b963James Dong BIC pSrc, pSrc, #3 1820c1bc742181ded4930842b46e9507372f0b1b963James Dong 1830c1bc742181ded4930842b46e9507372f0b1b963James Dong 1840c1bc742181ded4930842b46e9507372f0b1b963James Dong M_SWITCH x 1850c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Copy0toVAligned 1860c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Copy1toVAligned 1870c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Copy2toVAligned 1880c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Copy3toVAligned 1890c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ENDSWITCH 1900c1bc742181ded4930842b46e9507372f0b1b963James Dong 1910c1bc742181ded4930842b46e9507372f0b1b963James DongCopy0toVAligned 1920c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR x0, [pSrc], srcStep 1930c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS iHeight, iHeight, #1 1940c1bc742181ded4930842b46e9507372f0b1b963James Dong 1950c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// One cycle stall 1960c1bc742181ded4930842b46e9507372f0b1b963James Dong 1970c1bc742181ded4930842b46e9507372f0b1b963James Dong STR x0, [pDst], #4 ;// Store aligned output row 1980c1bc742181ded4930842b46e9507372f0b1b963James Dong BGT Copy0toVAligned 1990c1bc742181ded4930842b46e9507372f0b1b963James Dong B CopyVEnd 2000c1bc742181ded4930842b46e9507372f0b1b963James Dong 2010c1bc742181ded4930842b46e9507372f0b1b963James DongCopy1toVAligned 2020c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR x1, [pSrc, #4] 2030c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR x0, [pSrc], srcStep 2040c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS iHeight, iHeight, #1 2050c1bc742181ded4930842b46e9507372f0b1b963James Dong 2060c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// One cycle stall 2070c1bc742181ded4930842b46e9507372f0b1b963James Dong 2080c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV x1, x1, LSL #24 2090c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR x0, x1, x0, LSR #8 2100c1bc742181ded4930842b46e9507372f0b1b963James Dong STR x0, [pDst], #4 ;// Store aligned output row 2110c1bc742181ded4930842b46e9507372f0b1b963James Dong BGT Copy1toVAligned 2120c1bc742181ded4930842b46e9507372f0b1b963James Dong B CopyVEnd 2130c1bc742181ded4930842b46e9507372f0b1b963James Dong 2140c1bc742181ded4930842b46e9507372f0b1b963James DongCopy2toVAligned 2150c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR x1, [pSrc, #4] 2160c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR x0, [pSrc], srcStep 2170c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS iHeight, iHeight, #1 2180c1bc742181ded4930842b46e9507372f0b1b963James Dong 2190c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// One cycle stall 2200c1bc742181ded4930842b46e9507372f0b1b963James Dong 2210c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV x1, x1, LSL #16 2220c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR x0, x1, x0, LSR #16 2230c1bc742181ded4930842b46e9507372f0b1b963James Dong STR x0, [pDst], #4 ;// Store aligned output row 2240c1bc742181ded4930842b46e9507372f0b1b963James Dong BGT Copy2toVAligned 2250c1bc742181ded4930842b46e9507372f0b1b963James Dong B CopyVEnd 2260c1bc742181ded4930842b46e9507372f0b1b963James Dong 2270c1bc742181ded4930842b46e9507372f0b1b963James DongCopy3toVAligned 2280c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR x1, [pSrc, #4] 2290c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR x0, [pSrc], srcStep 2300c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS iHeight, iHeight, #1 2310c1bc742181ded4930842b46e9507372f0b1b963James Dong 2320c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// One cycle stall 2330c1bc742181ded4930842b46e9507372f0b1b963James Dong 2340c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV x1, x1, LSL #8 2350c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR x0, x1, x0, LSR #24 2360c1bc742181ded4930842b46e9507372f0b1b963James Dong STR x0, [pDst], #4 ;// Store aligned output row 2370c1bc742181ded4930842b46e9507372f0b1b963James Dong BGT Copy3toVAligned 2380c1bc742181ded4930842b46e9507372f0b1b963James Dong 2390c1bc742181ded4930842b46e9507372f0b1b963James DongCopyVEnd 2400c1bc742181ded4930842b46e9507372f0b1b963James Dong 2410c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pDst, #28 2420c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV srcStep, #4 2430c1bc742181ded4930842b46e9507372f0b1b963James Dong 2440c1bc742181ded4930842b46e9507372f0b1b963James Dong M_END 2450c1bc742181ded4930842b46e9507372f0b1b963James Dong 2460c1bc742181ded4930842b46e9507372f0b1b963James Dong 2470c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 2480c1bc742181ded4930842b46e9507372f0b1b963James Dong 2490c1bc742181ded4930842b46e9507372f0b1b963James Dong END 2500c1bc742181ded4930842b46e9507372f0b1b963James Dong 251