10c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Copyright (C) 2007-2008 ARM Limited 378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Licensed under the Apache License, Version 2.0 (the "License"); 578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// you may not use this file except in compliance with the License. 678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// You may obtain a copy of the License at 778e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 878e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// http://www.apache.org/licenses/LICENSE-2.0 978e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 1078e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Unless required by applicable law or agreed to in writing, software 1178e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// distributed under the License is distributed on an "AS IS" BASIS, 1278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// See the License for the specific language governing permissions and 1478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// limitations under the License. 1578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 1678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 170c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 180c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name: armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s 190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2 200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision: 9641 210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date: Thursday, February 7, 2008 220c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 230c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 240c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 250c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 260c1bc742181ded4930842b46e9507372f0b1b963James Dong 270c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE omxtypes_s.h 280c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE armCOMM_s.h 290c1bc742181ded4930842b46e9507372f0b1b963James Dong 300c1bc742181ded4930842b46e9507372f0b1b963James Dong EXPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe 310c1bc742181ded4930842b46e9507372f0b1b963James Dong 320c1bc742181ded4930842b46e9507372f0b1b963James Dong M_VARIANTS ARM1136JS 330c1bc742181ded4930842b46e9507372f0b1b963James Dong 340c1bc742181ded4930842b46e9507372f0b1b963James Dong 350c1bc742181ded4930842b46e9507372f0b1b963James Dong 360c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ARM1136JS 370c1bc742181ded4930842b46e9507372f0b1b963James Dong 380c1bc742181ded4930842b46e9507372f0b1b963James Dong 390c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC8 ppDstArgs, 8 400c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC8 pTempResult1, 8 410c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC8 pTempResult2, 8 420c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC4 ppSrc, 4 430c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC4 ppDst, 4 440c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC4 pDstStep, 4 450c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC4 pSrcStep, 4 460c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC4 pCounter, 4 470c1bc742181ded4930842b46e9507372f0b1b963James Dong 480c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Function header 490c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Function: 500c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe 510c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 520c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Implements diagonal interpolation for a block of size 4x4. Input and output should 530c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// be aligned. 540c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 550c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Registers used as input for this function 560c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// r0,r1,r2,r3, r8 where r0,r2 input pointer and r1,r3 step size, r8 intermediate-buf pointer 570c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 580c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Registers preserved for top level function 590c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// r0,r1,r2,r3,r4,r5,r6,r14 600c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 610c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Registers modified by the function 620c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// r7,r8,r9,r10,r11,r12 630c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 640c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Output registers 650c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// None. Function will preserve r0-r3 660c1bc742181ded4930842b46e9507372f0b1b963James Dong 670c1bc742181ded4930842b46e9507372f0b1b963James Dong M_START armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe, r6 680c1bc742181ded4930842b46e9507372f0b1b963James Dong 690c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare input registers 700c1bc742181ded4930842b46e9507372f0b1b963James DongpSrc RN 0 710c1bc742181ded4930842b46e9507372f0b1b963James DongsrcStep RN 1 720c1bc742181ded4930842b46e9507372f0b1b963James DongpDst RN 2 730c1bc742181ded4930842b46e9507372f0b1b963James DongdstStep RN 3 740c1bc742181ded4930842b46e9507372f0b1b963James Dong 750c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare inner loop registers 760c1bc742181ded4930842b46e9507372f0b1b963James DongAcc0 RN 4 770c1bc742181ded4930842b46e9507372f0b1b963James DongAcc1 RN 5 780c1bc742181ded4930842b46e9507372f0b1b963James DongAcc2 RN 6 790c1bc742181ded4930842b46e9507372f0b1b963James DongAcc3 RN 7 800c1bc742181ded4930842b46e9507372f0b1b963James Dong 810c1bc742181ded4930842b46e9507372f0b1b963James DongValA RN 4 820c1bc742181ded4930842b46e9507372f0b1b963James DongValB RN 5 830c1bc742181ded4930842b46e9507372f0b1b963James DongValC RN 6 840c1bc742181ded4930842b46e9507372f0b1b963James DongValD RN 7 850c1bc742181ded4930842b46e9507372f0b1b963James DongValE RN 8 860c1bc742181ded4930842b46e9507372f0b1b963James DongValF RN 9 870c1bc742181ded4930842b46e9507372f0b1b963James DongValG RN 12 880c1bc742181ded4930842b46e9507372f0b1b963James DongValH RN 14 890c1bc742181ded4930842b46e9507372f0b1b963James DongValI RN 1 900c1bc742181ded4930842b46e9507372f0b1b963James Dong 910c1bc742181ded4930842b46e9507372f0b1b963James DongTemp1 RN 3 920c1bc742181ded4930842b46e9507372f0b1b963James DongTemp2 RN 1 930c1bc742181ded4930842b46e9507372f0b1b963James DongTemp3 RN 12 940c1bc742181ded4930842b46e9507372f0b1b963James DongTemp4 RN 7 950c1bc742181ded4930842b46e9507372f0b1b963James DongTemp5 RN 5 960c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x0fe00fe0 RN 3 ;// [0 (16*255 - 16) 0 (16*255 - 16)] 970c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x00ff00ff RN 10 ;// [0 255 0 255] where 255 is offset 980c1bc742181ded4930842b46e9507372f0b1b963James DongCounter RN 11 990c1bc742181ded4930842b46e9507372f0b1b963James DongpInterBuf RN 8 1000c1bc742181ded4930842b46e9507372f0b1b963James Dong 1010c1bc742181ded4930842b46e9507372f0b1b963James DongValCA RN 8 1020c1bc742181ded4930842b46e9507372f0b1b963James DongValDB RN 9 1030c1bc742181ded4930842b46e9507372f0b1b963James DongValGE RN 10 1040c1bc742181ded4930842b46e9507372f0b1b963James DongValHF RN 11 1050c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x00140001 RN 12 1060c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x0014fffb RN 14 1070c1bc742181ded4930842b46e9507372f0b1b963James Dong 1080c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x0001fc00 RN 11 1090c1bc742181ded4930842b46e9507372f0b1b963James Dong 1100c1bc742181ded4930842b46e9507372f0b1b963James DongAccx RN 8 1110c1bc742181ded4930842b46e9507372f0b1b963James DongAccy RN 9 1120c1bc742181ded4930842b46e9507372f0b1b963James DongTemp6 RN 14 1130c1bc742181ded4930842b46e9507372f0b1b963James Dong 1140c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD pDst, dstStep, ppDstArgs 1150c1bc742181ded4930842b46e9507372f0b1b963James Dong 1160c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV pDst, pInterBuf 1170c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV dstStep, #16 1180c1bc742181ded4930842b46e9507372f0b1b963James Dong 1190c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Set up counter of format, [0] [0] [1 (height)] [8 (width)] 1200c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Counter, #4 1210c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR dstStep, pDstStep 1220c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR srcStep, pSrcStep 1230c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR r0x00ff00ff, =0x00ff00ff ;// [0 255 0 255] 255 is offset to avoid negative results 1240c1bc742181ded4930842b46e9507372f0b1b963James Dong 1250c1bc742181ded4930842b46e9507372f0b1b963James DongHeightLoop 1260c1bc742181ded4930842b46e9507372f0b1b963James DongNextTwoRowsLoop 1270c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR ValD, [pSrc, srcStep] ;// Load row 1 [d1 c1 b1 a1] 1280c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR ValA, [pSrc], #4 ;// Load row 0 [d0 c0 b0 a0] 1290c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR ValH, [pSrc, srcStep] ;// Load [h1 g1 f1 e1] 1300c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR ValE, [pSrc], #4 ;// Load [h0 g0 f0 e0] 1310c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB Temp2, [pSrc, srcStep] ;// Load row 1 [l1 k1 j1 i1] 1320c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB Temp1, [pSrc], #-8 ;// Load row 0 [l0 k0 j0 i0] 1330c1bc742181ded4930842b46e9507372f0b1b963James Dong 1340c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ValB, ValA, ValD, LSL #16 ;// [b1 a1 b0 a0] 1350c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB ValD, ValD, ValA, ASR #16 ;// [d1 c1 d0 c0] 1360c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValA, r0x00ff00ff, ValB ;// [00 a1 00 a0] + [0 255 0 255] 1370c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValC, r0x00ff00ff, ValD ;// [00 c1 00 c0] + [0 255 0 255] 1380c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ValI, Temp1, Temp2, LSL #16 ;// [00 i1 00 i0] 1390c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ValF, ValE, ValH, LSL #16 ;// [f1 e1 f0 e0] 1400c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB ValH, ValH, ValE, ASR #16 ;// [h1 g1 h0 g0] 1410c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValE, r0x00ff00ff, ValF ;// [00 e1 00 e0] + [0 255 0 255] 1420c1bc742181ded4930842b46e9507372f0b1b963James Dong 1430c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Calculate Acc0 1440c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Acc0 = a - 5*b + 20*c + 20*d - 5*e + f 1450c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 Temp1, ValC, ValD, ROR #8 1460c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 Temp3, ValE, ValB, ROR #8 1470c1bc742181ded4930842b46e9507372f0b1b963James Dong RSB Temp1, Temp3, Temp1, LSL #2 1480c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 Acc0, ValA, ValF, ROR #8 1490c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Temp1, Temp1, Temp1, LSL #2 1500c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Acc0, Acc0, Temp1 1510c1bc742181ded4930842b46e9507372f0b1b963James Dong 1520c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Calculate Acc1 1530c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Acc1 = b - 5*c + 20*d + 20*e - 5*f + g 1540c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 Temp1, ValE, ValD, ROR #8 1550c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 Temp3, ValC, ValF, ROR #8 1560c1bc742181ded4930842b46e9507372f0b1b963James Dong RSB Temp1, Temp3, Temp1, LSL #2 1570c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValG, r0x00ff00ff, ValH ;// [00 g1 00 g0] + [0 255 0 255] 1580c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Temp1, Temp1, Temp1, LSL #2 1590c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 Acc1, ValG, ValB, ROR #8 1600c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Acc1, Acc1, Temp1 1610c1bc742181ded4930842b46e9507372f0b1b963James Dong 1620c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 Acc2, ValC, ValH, ROR #8 1630c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD ValI, r0x00ff00ff, ValI ;// [00 i1 00 i0] + [0 255 0 255] 1640c1bc742181ded4930842b46e9507372f0b1b963James Dong 1650c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Calculate Acc2 1660c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Acc2 = c - 5*d + 20*e + 20*f - 5*g + h 1670c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 Temp1, ValG, ValD, ROR #8 1680c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 Acc3, ValI, ValD, ROR #8 1690c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 Temp2, ValE, ValF, ROR #8 1700c1bc742181ded4930842b46e9507372f0b1b963James Dong 1710c1bc742181ded4930842b46e9507372f0b1b963James Dong RSB Temp1, Temp1, Temp2, LSL #2 1720c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 Temp2, ValG, ValF, ROR #8 1730c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Temp1, Temp1, Temp1, LSL #2 1740c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Acc2, Acc2, Temp1 1750c1bc742181ded4930842b46e9507372f0b1b963James Dong 1760c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Calculate Acc3 1770c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Acc3 = d - 5*e + 20*f + 20*g - 5*h + i 1780c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 Temp1, ValE, ValH, ROR #8 1790c1bc742181ded4930842b46e9507372f0b1b963James Dong RSB Temp1, Temp1, Temp2, LSL #2 1800c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Temp1, Temp1, Temp1, LSL #2 1810c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Acc3, Acc3, Temp1 1820c1bc742181ded4930842b46e9507372f0b1b963James Dong 1830c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR dstStep, pDstStep 1840c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR srcStep, pSrcStep 1850c1bc742181ded4930842b46e9507372f0b1b963James Dong 1860c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// If Counter is even store Acc0-Acc3 in a temporary buffer 1870c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// If Counter is off store Acc0-Acc3 and previous Acc0-Acc3 in a intermediate buf 1880c1bc742181ded4930842b46e9507372f0b1b963James Dong ANDS Temp3, Counter, #1 1890c1bc742181ded4930842b46e9507372f0b1b963James Dong BEQ NoProcessing 1900c1bc742181ded4930842b46e9507372f0b1b963James Dong 1910c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Packing previous and current Acc0-Acc3 values 1920c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRD Accx, Accy, pTempResult1 1930c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT Temp6, Accx, Acc0, LSL #16 ;//[0 a2 0 a0] = [0 a3 0 a2] [0 a1 0 a0] 1940c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB Acc0, Acc0, Accx, ASR #16 ;//[0 a3 0 a1] = [0 a1 0 a0] [0 a3 0 a2] 1950c1bc742181ded4930842b46e9507372f0b1b963James Dong STR Acc0, [pDst, dstStep] 1960c1bc742181ded4930842b46e9507372f0b1b963James Dong STR Temp6, [pDst], #4 1970c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT Temp6, Accy, Acc1, LSL #16 ;//[0 b2 0 b0] = [0 b3 0 b2] [0 b1 0 b0] 1980c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB Acc1, Acc1, Accy, ASR #16 ;//[0 b3 0 b1] = [0 b1 0 b0] [0 b3 0 b2] 1990c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRD Accx, Accy, pTempResult2 2000c1bc742181ded4930842b46e9507372f0b1b963James Dong STR Acc1, [pDst, dstStep] 2010c1bc742181ded4930842b46e9507372f0b1b963James Dong STR Temp6, [pDst], #4 2020c1bc742181ded4930842b46e9507372f0b1b963James Dong 2030c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT Temp6, Accx, Acc2, LSL #16 ;//[0 c2 0 c0] = [0 c3 0 c2] [0 c1 0 c0] 2040c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB Acc2, Acc2, Accx, ASR #16 ;//[0 c3 0 c1] = [0 c1 0 c0] [0 c3 0 c2] 2050c1bc742181ded4930842b46e9507372f0b1b963James Dong STR Acc2, [pDst, dstStep] 2060c1bc742181ded4930842b46e9507372f0b1b963James Dong STR Temp6, [pDst], #4 2070c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT Temp6, Accy, Acc3, LSL #16 ;//[0 d2 0 d0] = [0 d3 0 d2] [0 d1 0 d0] 2080c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB Acc3, Acc3, Accy, ASR #16 ;//[0 d3 0 d1] = [0 d1 0 d0] [0 d3 0 d2] 2090c1bc742181ded4930842b46e9507372f0b1b963James Dong STR Acc3, [pDst, dstStep] 2100c1bc742181ded4930842b46e9507372f0b1b963James Dong STR Temp6, [pDst], #-12 2110c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDst, pDst, dstStep, LSL #1 2120c1bc742181ded4930842b46e9507372f0b1b963James Dong B AfterStore 2130c1bc742181ded4930842b46e9507372f0b1b963James Dong 2140c1bc742181ded4930842b46e9507372f0b1b963James DongNoProcessing 2150c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD Acc0, Acc1, pTempResult1 2160c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD Acc2, Acc3, pTempResult2 2170c1bc742181ded4930842b46e9507372f0b1b963James DongAfterStore 2180c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS Counter, Counter, #1 ;// Loop till height is 10 2190c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrc, srcStep, LSL #1 2200c1bc742181ded4930842b46e9507372f0b1b963James Dong BPL HeightLoop 2210c1bc742181ded4930842b46e9507372f0b1b963James Dong 2220c1bc742181ded4930842b46e9507372f0b1b963James Dong STR Acc0, [pDst], #4 ;//[0 a1 0 a0] 2230c1bc742181ded4930842b46e9507372f0b1b963James Dong STR Acc1, [pDst], #4 2240c1bc742181ded4930842b46e9507372f0b1b963James Dong STR Acc2, [pDst], #4 2250c1bc742181ded4930842b46e9507372f0b1b963James Dong STR Acc3, [pDst], #-12 2260c1bc742181ded4930842b46e9507372f0b1b963James Dong 2270c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 2280c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Horizontal interpolation using multiplication 2290c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 2300c1bc742181ded4930842b46e9507372f0b1b963James Dong 2310c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pDst, dstStep, LSL #2 2320c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV srcStep, #16 2330c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRD pDst, dstStep, ppDstArgs 2340c1bc742181ded4930842b46e9507372f0b1b963James Dong 2350c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Counter, #4 2360c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR r0x0014fffb, =0x0014fffb 2370c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR r0x00140001, =0x00140001 2380c1bc742181ded4930842b46e9507372f0b1b963James Dong 2390c1bc742181ded4930842b46e9507372f0b1b963James DongHeightLoop1 2400c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR Counter, pCounter 2410c1bc742181ded4930842b46e9507372f0b1b963James Dong 2420c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR ValCA, [pSrc], srcStep ;// Load [0 c 0 a] 2430c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR ValDB, [pSrc], srcStep ;// Load [0 d 0 b] 2440c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR ValGE, [pSrc], srcStep ;// Load [0 g 0 e] 2450c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR ValHF, [pSrc], srcStep ;// Load [0 h 0 f] 2460c1bc742181ded4930842b46e9507372f0b1b963James Dong 2470c1bc742181ded4930842b46e9507372f0b1b963James Dong 2480c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Acc0 = smuad ([0 20 0 1], add([0 c 0 a] + [0 d 0 f])) - (5 * (b + e)) 2490c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Acc1 = smuad ([0 20 0 1], add([0 e 0 g] + [0 d 0 b])) - (5 * (c + f)) 2500c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Acc2 = smuad ([0 1 0 20], add([0 c 0 e] + [0 h 0 f])) - (5 * (d + g)) 2510c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Acc3 = smuad ([0 20 0 1], add([0 d 0 f] + [0 i 0 g])) - (5 * (e + h)) 2520c1bc742181ded4930842b46e9507372f0b1b963James Dong 2530c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUAD Acc0, ValCA, r0x00140001 ;// Acc0 = [0 c 0 a] * [0 20 0 1] 2540c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUAD Acc1, ValDB, r0x00140001 ;// Acc1 = [0 c 0 a] * [0 20 0 1] 2550c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUADX Acc2, ValGE, r0x0014fffb ;// Acc2 = [0 g 0 e] * [0 20 0 -5] 2560c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUAD Acc3, ValGE, r0x0014fffb ;// Acc3 = [0 g 0 e] * [0 20 0 -5] 2570c1bc742181ded4930842b46e9507372f0b1b963James Dong 2580c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAD Acc0, ValDB, r0x0014fffb, Acc0 ;// Acc0 += [0 d 0 b] * [0 20 0 -5] 2590c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX Acc1, ValGE, r0x00140001, Acc1 ;// Acc1 += [0 g 0 e] * [0 20 0 1] 2600c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX Acc2, ValHF, r0x00140001, Acc2 ;// Acc2 += [0 h 0 f] * [0 20 0 1] 2610c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX Acc3, ValHF, r0x0014fffb, Acc3 ;// Acc3 += [0 h 0 f] * [0 20 0 -5] 2620c1bc742181ded4930842b46e9507372f0b1b963James Dong 2630c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB Acc0, ValGE, r0x0014fffb, Acc0 ;// Acc0 += [0 g 0 e] * [0 0 0 -5] 2640c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB Acc1, ValCA, r0x0014fffb, Acc1 ;// Acc1 += [0 d 0 b] * [0 0 0 -5] 2650c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB Acc2, ValCA, r0x00140001, Acc2 ;// Acc2 += [0 c 0 a] * [0 0 0 1] 2660c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB Acc3, ValDB, r0x00140001, Acc3 ;// Acc3 += [0 c 0 a] * [0 0 0 1] 2670c1bc742181ded4930842b46e9507372f0b1b963James Dong 2680c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRH ValCA, [pSrc], #4 ;// 8 = srcStep - 16 2690c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB Acc0, ValHF, r0x00140001, Acc0 ;// Acc0 += [0 h 0 f] * [0 0 0 1] 2700c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB Acc1, ValHF, r0x0014fffb, Acc1 ;// Acc1 += [0 h 0 f] * [0 0 0 -5] 2710c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB Acc2, ValDB, r0x0014fffb, Acc2 ;// Acc2 += [0 d 0 b] * [0 0 0 -5] 2720c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB Acc3, ValCA, r0x00140001, Acc3 ;// Acc3 += [0 d 0 b] * [0 0 0 1] 2730c1bc742181ded4930842b46e9507372f0b1b963James Dong 2740c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR r0x0001fc00, =0x0001fc00 ;// (0xff * 16 * 32) - 512 2750c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB Acc0, Acc0, r0x0001fc00 2760c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB Acc1, Acc1, r0x0001fc00 2770c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB Acc2, Acc2, r0x0001fc00 2780c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB Acc3, Acc3, r0x0001fc00 2790c1bc742181ded4930842b46e9507372f0b1b963James Dong 2800c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT Acc0, #18, Acc0 2810c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT Acc1, #18, Acc1 2820c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT Acc2, #18, Acc2 2830c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT Acc3, #18, Acc3 2840c1bc742181ded4930842b46e9507372f0b1b963James Dong 2850c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Acc0, Acc0, LSR #10 2860c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRB Acc0, [pDst], dstStep 2870c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Acc1, Acc1, LSR #10 2880c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRB Acc1, [pDst], dstStep 2890c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Acc2, Acc2, LSR #10 2900c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRB Acc2, [pDst], dstStep 2910c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Acc3, Acc3, LSR #10 2920c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRB Acc3, [pDst], dstStep 2930c1bc742181ded4930842b46e9507372f0b1b963James Dong 2940c1bc742181ded4930842b46e9507372f0b1b963James Dong 2950c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR Counter, pCounter 2960c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pDst, pDst, dstStep, LSL #2 2970c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #2 2980c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDst, pDst, #1 2990c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS Counter, Counter, #1 3000c1bc742181ded4930842b46e9507372f0b1b963James Dong BGT HeightLoop1 3010c1bc742181ded4930842b46e9507372f0b1b963James DongEnd 3020c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pDst, pDst, #4 3030c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, #16 3040c1bc742181ded4930842b46e9507372f0b1b963James Dong 3050c1bc742181ded4930842b46e9507372f0b1b963James Dong M_END 3060c1bc742181ded4930842b46e9507372f0b1b963James Dong 3070c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 3080c1bc742181ded4930842b46e9507372f0b1b963James Dong 3090c1bc742181ded4930842b46e9507372f0b1b963James Dong END 3100c1bc742181ded4930842b46e9507372f0b1b963James Dong 311